From cf8f3d4deb02a8fdc806c46d4112b69868544697 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Raja Date: Wed, 15 Feb 2023 20:31:36 +0200 Subject: [PATCH 0001/2078] wifi: ath11k: Set ext passive scan flag to adjust passive scan start time Set the WMI_SCAN_FLAG_EXT_PASSIVE_SCAN_START_TIME_ENHANCE flag while sending the scan command. If this flag is enabled when the incoming scan request comes with a strict start time and its duration overlaps with next TBTT, then target adjust the start time accordingly for passive scan. Target supporting this feature will advertise WMI_TLV_SERVICE_PASSIVE_SCAN_START_TIME_ENHANCE. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01467-QCAHKSWPL_SILICONZ-1 Signed-off-by: Tamizh Chelvam Raja Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221222131720.11368-1-quic_tamizhr@quicinc.com --- drivers/net/wireless/ath/ath11k/wmi.c | 8 ++++++++ drivers/net/wireless/ath/ath11k/wmi.h | 3 +++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index b3a7d7bfe17c2..7057c7916d5dc 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -2068,6 +2068,12 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar, WMI_SCAN_EVENT_FOREIGN_CHAN | WMI_SCAN_EVENT_DEQUEUED; arg->scan_flags |= WMI_SCAN_CHAN_STAT_EVENT; + + if (test_bit(WMI_TLV_SERVICE_PASSIVE_SCAN_START_TIME_ENHANCE, + ar->ab->wmi_ab.svc_map)) + arg->scan_ctrl_flags_ext |= + WMI_SCAN_FLAG_EXT_PASSIVE_SCAN_START_TIME_ENHANCE; + arg->num_bssid = 1; /* fill bssid_list[0] with 0xff, otherwise bssid and RA will be @@ -2149,6 +2155,8 @@ ath11k_wmi_copy_scan_event_cntrl_flags(struct wmi_start_scan_cmd *cmd, /* for adaptive scan mode using 3 bits (21 - 23 bits) */ WMI_SCAN_SET_DWELL_MODE(cmd->scan_ctrl_flags, param->adaptive_dwell_time_mode); + + cmd->scan_ctrl_flags_ext = param->scan_ctrl_flags_ext; } int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar, diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 0a045af5419bd..2273d885f7fa7 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2093,6 +2093,7 @@ enum wmi_tlv_service { WMI_TLV_SERVICE_EXT2_MSG = 220, WMI_TLV_SERVICE_PEER_POWER_SAVE_DURATION_SUPPORT = 246, WMI_TLV_SERVICE_SRG_SRP_SPATIAL_REUSE_SUPPORT = 249, + WMI_TLV_SERVICE_PASSIVE_SCAN_START_TIME_ENHANCE = 263, /* The second 128 bits */ WMI_MAX_EXT_SERVICE = 256, @@ -3223,6 +3224,7 @@ struct wmi_start_scan_cmd { #define WMI_SCAN_DWELL_MODE_MASK 0x00E00000 #define WMI_SCAN_DWELL_MODE_SHIFT 21 +#define WMI_SCAN_FLAG_EXT_PASSIVE_SCAN_START_TIME_ENHANCE 0x00000800 enum { WMI_SCAN_DWELL_MODE_DEFAULT = 0, @@ -3270,6 +3272,7 @@ struct scan_req_params { }; u32 scan_events; }; + u32 scan_ctrl_flags_ext; u32 dwell_time_active; u32 dwell_time_active_2g; u32 dwell_time_passive; -- GitLab From 778f83f889e7fca37780d9640fcbd0229ae38eaa Mon Sep 17 00:00:00 2001 From: "Alexey V. Vissarionov" Date: Wed, 15 Feb 2023 20:31:37 +0200 Subject: [PATCH 0002/2078] wifi: ath6kl: minor fix for allocation size Although the "param" pointer occupies more or equal space compared to "*param", the allocation size should use the size of variable itself. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: bdcd81707973cf8a ("Add ath6kl cleaned up driver") Signed-off-by: Alexey V. Vissarionov Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230117110414.GC12547@altlinux.org --- drivers/net/wireless/ath/ath6kl/bmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c index bde5a10d470c8..af98e871199d3 100644 --- a/drivers/net/wireless/ath/ath6kl/bmi.c +++ b/drivers/net/wireless/ath/ath6kl/bmi.c @@ -246,7 +246,7 @@ int ath6kl_bmi_execute(struct ath6kl *ar, u32 addr, u32 *param) return -EACCES; } - size = sizeof(cid) + sizeof(addr) + sizeof(param); + size = sizeof(cid) + sizeof(addr) + sizeof(*param); if (size > ar->bmi.max_cmd_size) { WARN_ON(1); return -EINVAL; -- GitLab From 480c9df5778774117546f6389be1a8dc8cc935db Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 15 Feb 2023 20:31:37 +0200 Subject: [PATCH 0003/2078] wifi: ath12k: Fix spelling mistakes in warning messages and comments There are quite a few spelling mistakes in warning messages and a lot of the comments. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092122.265336-1-colin.i.king@gmail.com --- drivers/net/wireless/ath/ath12k/ce.c | 2 +- drivers/net/wireless/ath/ath12k/core.h | 2 +- drivers/net/wireless/ath/ath12k/dp.c | 2 +- drivers/net/wireless/ath/ath12k/dp.h | 6 +++--- drivers/net/wireless/ath/ath12k/dp_mon.c | 10 +++++----- drivers/net/wireless/ath/ath12k/dp_rx.c | 8 ++++---- drivers/net/wireless/ath/ath12k/hal.c | 2 +- drivers/net/wireless/ath/ath12k/hal.h | 12 ++++++------ drivers/net/wireless/ath/ath12k/hal_desc.h | 10 +++++----- drivers/net/wireless/ath/ath12k/rx_desc.h | 2 +- drivers/net/wireless/ath/ath12k/wmi.c | 6 +++--- drivers/net/wireless/ath/ath12k/wmi.h | 4 ++-- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/ce.c b/drivers/net/wireless/ath/ath12k/ce.c index aed6987804bfc..be0d669d31fcc 100644 --- a/drivers/net/wireless/ath/ath12k/ce.c +++ b/drivers/net/wireless/ath/ath12k/ce.c @@ -946,7 +946,7 @@ int ath12k_ce_alloc_pipes(struct ath12k_base *ab) ret = ath12k_ce_alloc_pipe(ab, i); if (ret) { - /* Free any parial successful allocation */ + /* Free any partial successful allocation */ ath12k_ce_free_pipes(ab); return ret; } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index a54ae74543c10..dffa687ee40ef 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -691,7 +691,7 @@ struct ath12k_base { /* Below regd's are protected by ab->data_lock */ /* This is the regd set for every radio - * by the firmware during initializatin + * by the firmware during initialization */ struct ieee80211_regdomain *default_regd[MAX_RADIOS]; /* This regd is set during dynamic country setting diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index eb0261500ab0a..9926d81c5fe45 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1429,7 +1429,7 @@ static int ath12k_dp_cc_init(struct ath12k_base *ab) } if (dp->spt_info[i].paddr & ATH12K_SPT_4K_ALIGN_CHECK) { - ath12k_warn(ab, "SPT allocated memoty is not 4K aligned"); + ath12k_warn(ab, "SPT allocated memory is not 4K aligned"); ret = -EINVAL; goto free; } diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 36a876d7f61d6..7c5dafce5a68d 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -371,7 +371,7 @@ struct ath12k_dp { #define HTT_TX_WBM_COMP_STATUS_OFFSET 8 -/* HTT tx completion is overlayed in wbm_release_ring */ +/* HTT tx completion is overlaid in wbm_release_ring */ #define HTT_TX_WBM_COMP_INFO0_STATUS GENMASK(16, 13) #define HTT_TX_WBM_COMP_INFO1_REINJECT_REASON GENMASK(3, 0) #define HTT_TX_WBM_COMP_INFO1_EXCEPTION_FRAME BIT(4) @@ -545,7 +545,7 @@ enum htt_srng_ring_id { * 3'b010: 4 usec * 3'b011: 8 usec (default) * 3'b100: 16 usec - * Others: Reserverd + * Others: Reserved * b'19 - response_required: * Host needs HTT_T2H_MSG_TYPE_SRING_SETUP_DONE as response * b'20:31 - reserved: reserved for future use @@ -1126,7 +1126,7 @@ struct htt_tx_ring_selection_cfg_cmd { __le32 tlv_filter_mask_in1; __le32 tlv_filter_mask_in2; __le32 tlv_filter_mask_in3; - __le32 reserverd[3]; + __le32 reserved[3]; } __packed; #define HTT_TX_RING_TLV_FILTER_MGMT_DMA_LEN GENMASK(3, 0) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index a214797c96a2d..4f93e4c95fed2 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -813,7 +813,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, spin_unlock_bh(&buf_ring->idr_lock); if (unlikely(!msdu)) { - ath12k_warn(ab, "montior destination with invalid buf_id %d\n", + ath12k_warn(ab, "monitor destination with invalid buf_id %d\n", buf_id); return HAL_RX_MON_STATUS_PPDU_NOT_DONE; } @@ -1124,7 +1124,7 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct /* PN for multicast packets are not validate in HW, * so skip 802.3 rx path - * Also, fast_rx expectes the STA to be authorized, hence + * Also, fast_rx expects the STA to be authorized, hence * eapol packets are sent in slow path. */ if (decap == DP_RX_DECAP_TYPE_ETHERNET2_DIX && !is_eapol_tkip && @@ -1917,7 +1917,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, spin_unlock_bh(&buf_ring->idr_lock); if (unlikely(!msdu)) { - ath12k_warn(ab, "montior destination with invalid buf_id %d\n", + ath12k_warn(ab, "monitor destination with invalid buf_id %d\n", buf_id); return DP_MON_TX_STATUS_PPDU_NOT_DONE; } @@ -2110,7 +2110,7 @@ int ath12k_dp_mon_srng_process(struct ath12k *ar, int mac_id, int *budget, spin_unlock_bh(&buf_ring->idr_lock); if (unlikely(!skb)) { - ath12k_warn(ab, "montior destination with invalid buf_id %d\n", + ath12k_warn(ab, "monitor destination with invalid buf_id %d\n", buf_id); goto move_next; } @@ -2511,7 +2511,7 @@ int ath12k_dp_mon_rx_process_stats(struct ath12k *ar, int mac_id, spin_unlock_bh(&buf_ring->idr_lock); if (unlikely(!skb)) { - ath12k_warn(ab, "montior destination with invalid buf_id %d\n", + ath12k_warn(ab, "monitor destination with invalid buf_id %d\n", buf_id); goto move_next; } diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 83a43ad48c512..eb67b3409f858 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2443,7 +2443,7 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap /* PN for multicast packets are not validate in HW, * so skip 802.3 rx path - * Also, fast_rx expectes the STA to be authorized, hence + * Also, fast_rx expects the STA to be authorized, hence * eapol packets are sent in slow path. */ if (decap == DP_RX_DECAP_TYPE_ETHERNET2_DIX && !is_eapol && @@ -2611,7 +2611,7 @@ try_again: if (!desc_info) { desc_info = ath12k_dp_get_rx_desc(ab, cookie); if (!desc_info) { - ath12k_warn(ab, "Invalid cookie in manual desc retrival"); + ath12k_warn(ab, "Invalid cookie in manual desc retrieval"); continue; } } @@ -3297,7 +3297,7 @@ ath12k_dp_process_rx_err_buf(struct ath12k *ar, struct hal_reo_dest_ring *desc, if (!desc_info) { desc_info = ath12k_dp_get_rx_desc(ab, cookie); if (!desc_info) { - ath12k_warn(ab, "Invalid cookie in manual desc retrival"); + ath12k_warn(ab, "Invalid cookie in manual desc retrieval"); return -EINVAL; } } @@ -3718,7 +3718,7 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, if (!desc_info) { desc_info = ath12k_dp_get_rx_desc(ab, err_info.cookie); if (!desc_info) { - ath12k_warn(ab, "Invalid cookie in manual desc retrival"); + ath12k_warn(ab, "Invalid cookie in manual desc retrieval"); continue; } } diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index 95d04819083fe..0ec53afe9915d 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -609,7 +609,7 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) HAL_WBM0_RELEASE_RING_BASE_LSB(ab); s->reg_size[1] = HAL_WBM1_RELEASE_RING_HP - HAL_WBM0_RELEASE_RING_HP; - /* Some LMAC rings are not accesed from the host: + /* Some LMAC rings are not accessed from the host: * RXDMA_BUG, RXDMA_DST, RXDMA_MONITOR_BUF, RXDMA_MONITOR_STATUS, * RXDMA_MONITOR_DST, RXDMA_MONITOR_DESC, RXDMA_DIR_BUF_SRC, * RXDMA_RX_MONITOR_BUF, TX_MONITOR_BUF, TX_MONITOR_DST, SW2RXDMA diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index dfbd8bce70e52..0d4fa12ea622b 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -270,7 +270,7 @@ struct ath12k_base; #define HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW4_EN BIT(5) #define HAL_WBM_SW_COOKIE_CONV_CFG_GLOBAL_EN BIT(8) -/* TCL ring feild mask and offset */ +/* TCL ring field mask and offset */ #define HAL_TCL1_RING_BASE_MSB_RING_SIZE GENMASK(27, 8) #define HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB GENMASK(7, 0) #define HAL_TCL1_RING_ID_ENTRY_SIZE GENMASK(7, 0) @@ -296,7 +296,7 @@ struct ath12k_base; #define HAL_TCL1_RING_FIELD_DSCP_TID_MAP6 GENMASK(20, 18) #define HAL_TCL1_RING_FIELD_DSCP_TID_MAP7 GENMASK(23, 21) -/* REO ring feild mask and offset */ +/* REO ring field mask and offset */ #define HAL_REO1_RING_BASE_MSB_RING_SIZE GENMASK(27, 8) #define HAL_REO1_RING_BASE_MSB_RING_BASE_ADDR_MSB GENMASK(7, 0) #define HAL_REO1_RING_ID_RING_ID GENMASK(15, 8) @@ -738,7 +738,7 @@ struct hal_srng { } u; }; -/* Interrupt mitigation - Batch threshold in terms of numer of frames */ +/* Interrupt mitigation - Batch threshold in terms of number of frames */ #define HAL_SRNG_INT_BATCH_THRESHOLD_TX 256 #define HAL_SRNG_INT_BATCH_THRESHOLD_RX 128 #define HAL_SRNG_INT_BATCH_THRESHOLD_OTHER 1 @@ -813,7 +813,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_FLG_UNBLK_RESOURCE BIT(7) #define HAL_REO_CMD_FLG_UNBLK_CACHE BIT(8) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO0_UPD_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO0_UPD_* fields */ #define HAL_REO_CMD_UPD0_RX_QUEUE_NUM BIT(8) #define HAL_REO_CMD_UPD0_VLD BIT(9) #define HAL_REO_CMD_UPD0_ALDC BIT(10) @@ -838,7 +838,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_UPD0_PN_VALID BIT(29) #define HAL_REO_CMD_UPD0_PN BIT(30) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO1_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO1_* fields */ #define HAL_REO_CMD_UPD1_VLD BIT(16) #define HAL_REO_CMD_UPD1_ALDC GENMASK(18, 17) #define HAL_REO_CMD_UPD1_DIS_DUP_DETECTION BIT(19) @@ -854,7 +854,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_UPD1_PN_HANDLE_ENABLE BIT(30) #define HAL_REO_CMD_UPD1_IGNORE_AMPDU_FLG BIT(31) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO2_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO2_* fields */ #define HAL_REO_CMD_UPD2_SVLD BIT(10) #define HAL_REO_CMD_UPD2_SSN GENMASK(22, 11) #define HAL_REO_CMD_UPD2_SEQ_2K_ERR BIT(23) diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index 2250ca2d19a3d..6c17adc6d60b5 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -706,7 +706,7 @@ struct rx_msdu_desc { * * msdu_continuation * When set, this MSDU buffer was not able to hold the entire MSDU. - * The next buffer will therefor contain additional information + * The next buffer will therefore contain additional information * related to this MSDU. * * msdu_length @@ -1294,7 +1294,7 @@ struct hal_tcl_data_cmd { * link descriptor. * * tcl_cmd_type - * used to select the type of TCL Command decriptor + * used to select the type of TCL Command descriptor * * desc_type * Indicates the type of address provided in the buf_addr_info. @@ -1408,7 +1408,7 @@ struct hal_tcl_data_cmd { * index_loop_override * When set, address search and packet routing is forced to use * 'search_index' instead of following the register configuration - * seleced by Bank_id. + * selected by Bank_id. * * ring_id * The buffer pointer ring ID. @@ -1990,7 +1990,7 @@ struct hal_wbm_release_ring { * Producer: SW/TQM/RXDMA/REO/SWITCH * Consumer: WBM/SW/FW * - * HTT tx status is overlayed on wbm_release ring on 4-byte words 2, 3, 4 and 5 + * HTT tx status is overlaid on wbm_release ring on 4-byte words 2, 3, 4 and 5 * for software based completions. * * buf_addr_info @@ -2552,7 +2552,7 @@ struct hal_reo_status_hdr { * commands. * * execution_time (in us) - * The amount of time REO took to excecute the command. Note that + * The amount of time REO took to execute the command. Note that * this time does not include the duration of the command waiting * in the command ring, before the execution started. * diff --git a/drivers/net/wireless/ath/ath12k/rx_desc.h b/drivers/net/wireless/ath/ath12k/rx_desc.h index 5feaff6450ad2..f99556a253e56 100644 --- a/drivers/net/wireless/ath/ath12k/rx_desc.h +++ b/drivers/net/wireless/ath/ath12k/rx_desc.h @@ -1072,7 +1072,7 @@ struct rx_msdu_end_qcn9274 { * * l4_offset * Depending upon mode bit, this field either indicates the - * L4 offset nin bytes from the start of RX_HEADER (only valid + * L4 offset in bytes from the start of RX_HEADER (only valid * if either ipv4_proto or ipv6_proto is set to 1) or indicates * the offset in bytes to the start of TCP or UDP header from * the start of the IP header after decapsulation (Only valid if diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index f6df14149531e..3e6991120e538 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -494,7 +494,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, /* tx/rx chainmask reported from fw depends on the actual hw chains used, * For example, for 4x4 capable macphys, first 4 chains can be used for first - * mac and the remaing 4 chains can be used for the second mac or vice-versa. + * mac and the remaining 4 chains can be used for the second mac or vice-versa. * In this case, tx/rx chainmask 0xf will be advertised for first mac and 0xf0 * will be advertised for second mac or vice-versa. Compute the shift value * for tx/rx chainmask which will be used to advertise supported ht/vht rates to @@ -1743,7 +1743,7 @@ int ath12k_wmi_vdev_install_key(struct ath12k *ar, int ret, len, key_len_aligned; /* WMI_TAG_ARRAY_BYTE needs to be aligned with 4, the actual key - * length is specifed in cmd->key_len. + * length is specified in cmd->key_len. */ key_len_aligned = roundup(arg->key_len, 4); @@ -5995,7 +5995,7 @@ static void ath12k_service_available_event(struct ath12k_base *ab, struct sk_buf } /* TODO: Use wmi_service_segment_offset information to get the service - * especially when more services are advertised in multiple sevice + * especially when more services are advertised in multiple service * available events. */ for (i = 0, j = WMI_MAX_SERVICE; diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 84e3fb918e43f..08a8c9e0f59fb 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4002,7 +4002,7 @@ struct ath12k_wmi_pdev_radar_event { } __packed; struct wmi_pdev_temperature_event { - /* temperature value in Celcius degree */ + /* temperature value in Celsius degree */ a_sle32 temp; __le32 pdev_id; } __packed; @@ -4192,7 +4192,7 @@ enum wmi_sta_ps_param_tx_wake_threshold { */ enum wmi_sta_ps_param_pspoll_count { WMI_STA_PS_PSPOLL_COUNT_NO_MAX = 0, - /* Values greater than 0 indicate the maximum numer of PS-Poll frames + /* Values greater than 0 indicate the maximum number of PS-Poll frames * FW will send before waking up. */ }; -- GitLab From 731e1b36656a30ffa9c77de673695cdc733d6c92 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Feb 2023 20:31:37 +0200 Subject: [PATCH 0004/2078] wifi: ath12k: dp_mon: Fix unsigned comparison with less than zero The return value from the call to idr_alloc() is int. However, the return value is being assigned to an unsigned int variable 'buf_id', so making 'buf_id' an int. Eliminate the following warning: ./drivers/net/wireless/ath/ath12k/dp_mon.c:1300:15-21: WARNING: Unsigned expression compared with zero: buf_id < 0 Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4060 Signed-off-by: Yang Li Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230215011453.73466-1-yang.lee@linux.alibaba.com --- drivers/net/wireless/ath/ath12k/dp_mon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 4f93e4c95fed2..85f8f802340dd 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1268,7 +1268,8 @@ int ath12k_dp_mon_buf_replenish(struct ath12k_base *ab, struct sk_buff *skb; struct hal_srng *srng; dma_addr_t paddr; - u32 cookie, buf_id; + u32 cookie; + int buf_id; srng = &ab->hal.srng_list[buf_ring->refill_buf_ring.ring_id]; spin_lock_bh(&srng->lock); -- GitLab From df8e8db22c25f3d0f75ee0c79342d4a0841f4757 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Feb 2023 20:31:38 +0200 Subject: [PATCH 0005/2078] wifi: ath12k: dp_mon: clean up some inconsistent indentings drivers/net/wireless/ath/ath12k/dp_mon.c:532 ath12k_dp_mon_parse_he_sig_su() warn: inconsistent indenting Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4062 Signed-off-by: Yang Li Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230215012548.90989-1-yang.lee@linux.alibaba.com --- drivers/net/wireless/ath/ath12k/dp_mon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 85f8f802340dd..f1e57e98bdc60 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -529,12 +529,12 @@ static void ath12k_dp_mon_parse_he_sig_su(u8 *tlv_data, case 3: if (he_dcm && he_stbc) { he_gi = HE_GI_0_8; - he_ltf = HE_LTF_4_X; + he_ltf = HE_LTF_4_X; } else { he_gi = HE_GI_3_2; he_ltf = HE_LTF_4_X; - } - break; + } + break; } ppdu_info->gi = he_gi; value = he_gi << HE_GI_SHIFT; -- GitLab From 3c3ab8c9a29662ef25f8f62b8f118cd9b0eadd88 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Feb 2023 20:31:38 +0200 Subject: [PATCH 0006/2078] wifi: ath10k: Remove the unused function shadow_dst_wr_ind_addr() and ath10k_ce_error_intr_enable() The function shadow_dst_wr_ind_addr() and ath10k_ce_error_intr_enable() are defined in the ce.c file, the code calling them has been removed, so remove these unused functions. Eliminate the following warnings: drivers/net/wireless/ath/ath10k/ce.c:80:19: warning: unused function 'shadow_dst_wr_ind_addr' drivers/net/wireless/ath/ath10k/ce.c:441:20: warning: unused function 'ath10k_ce_error_intr_enable' Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4063 Signed-off-by: Yang Li Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230215014058.116775-1-yang.lee@linux.alibaba.com --- drivers/net/wireless/ath/ath10k/ce.c | 52 ---------------------------- 1 file changed, 52 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index c2f3bd35c3928..b656cfc03648f 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -77,45 +77,6 @@ static inline u32 shadow_sr_wr_ind_addr(struct ath10k *ar, return addr; } -static inline u32 shadow_dst_wr_ind_addr(struct ath10k *ar, - struct ath10k_ce_pipe *ce_state) -{ - u32 ce_id = ce_state->id; - u32 addr = 0; - - switch (ce_id) { - case 1: - addr = 0x00032034; - break; - case 2: - addr = 0x00032038; - break; - case 5: - addr = 0x00032044; - break; - case 7: - addr = 0x0003204C; - break; - case 8: - addr = 0x00032050; - break; - case 9: - addr = 0x00032054; - break; - case 10: - addr = 0x00032058; - break; - case 11: - addr = 0x0003205C; - break; - default: - ath10k_warn(ar, "invalid CE id: %d", ce_id); - break; - } - - return addr; -} - static inline unsigned int ath10k_set_ring_byte(unsigned int offset, struct ath10k_hw_ce_regs_addr_map *addr_map) @@ -438,19 +399,6 @@ static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar, host_ie_addr & ~(wm_regs->wm_mask)); } -static inline void ath10k_ce_error_intr_enable(struct ath10k *ar, - u32 ce_ctrl_addr) -{ - struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs; - - u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr + - ar->hw_ce_regs->misc_ie_addr); - - ath10k_ce_write32(ar, - ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr, - misc_ie_addr | misc_regs->err_mask); -} - static inline void ath10k_ce_error_intr_disable(struct ath10k *ar, u32 ce_ctrl_addr) { -- GitLab From bfcc8ba45eb87bfaaff900bbad2b87b204899d41 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 15 Feb 2023 20:31:38 +0200 Subject: [PATCH 0007/2078] wifi: ath: Silence memcpy run-time false positive warning The memcpy() in ath_key_config() was attempting to write across neighboring struct members in struct ath_keyval. Introduce a wrapping struct_group, kv_values, to be the addressable target of the memcpy without overflowing an individual member. Silences the false positive run-time warning: memcpy: detected field-spanning write (size 32) of single field "hk.kv_val" at drivers/net/wireless/ath/key.c:506 (size 16) Link: https://bbs.archlinux.org/viewtopic.php?id=282254 Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230210054310.never.554-kees@kernel.org --- drivers/net/wireless/ath/ath.h | 12 +++++++----- drivers/net/wireless/ath/key.c | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h index f083fb9038c36..f02a308a9ffc5 100644 --- a/drivers/net/wireless/ath/ath.h +++ b/drivers/net/wireless/ath/ath.h @@ -96,11 +96,13 @@ struct ath_keyval { u8 kv_type; u8 kv_pad; u16 kv_len; - u8 kv_val[16]; /* TK */ - u8 kv_mic[8]; /* Michael MIC key */ - u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware - * supports both MIC keys in the same key cache entry; - * in that case, kv_mic is the RX key) */ + struct_group(kv_values, + u8 kv_val[16]; /* TK */ + u8 kv_mic[8]; /* Michael MIC key */ + u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware + * supports both MIC keys in the same key cache entry; + * in that case, kv_mic is the RX key) */ + ); }; enum ath_cipher { diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c index 61b59a804e308..b7b61d4f02bae 100644 --- a/drivers/net/wireless/ath/key.c +++ b/drivers/net/wireless/ath/key.c @@ -503,7 +503,7 @@ int ath_key_config(struct ath_common *common, hk.kv_len = key->keylen; if (key->keylen) - memcpy(hk.kv_val, key->key, key->keylen); + memcpy(&hk.kv_values, key->key, key->keylen); if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) { switch (vif->type) { -- GitLab From 8c464d16809fa02982f6341ea598ec5d07457f19 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Feb 2023 15:16:14 +0300 Subject: [PATCH 0008/2078] wifi: ath12k: use kfree_skb() instead of kfree() Sk_buffs are supposed to be freed with kfree_skb(). Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/Y+4ejiYakhEvEw7c@kili --- drivers/net/wireless/ath/ath12k/dp_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index 95294f35155c4..fd8d850f9818f 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -270,7 +270,7 @@ tcl_ring_sel: skb_ext_desc->len, DMA_TO_DEVICE); ret = dma_mapping_error(ab->dev, ti.paddr); if (ret) { - kfree(skb_ext_desc); + kfree_skb(skb_ext_desc); goto fail_unmap_dma; } -- GitLab From 342fcde9d91460f01f65707e16368a1571271a3a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 17 Feb 2023 11:00:31 +0800 Subject: [PATCH 0009/2078] wifi: ath11k: fix return value check in ath11k_ahb_probe() ioremap() returns NULL pointer not PTR_ERR() when it fails, so replace the IS_ERR() check with NULL pointer check. Fixes: b42b3678c91f ("wifi: ath11k: remap ce register space for IPQ5018") Signed-off-by: Yang Yingliang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230217030031.4021289-1-yangyingliang@huawei.com --- drivers/net/wireless/ath/ath11k/ahb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index cd48eca494ed5..022caacd28220 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1174,7 +1174,7 @@ static int ath11k_ahb_probe(struct platform_device *pdev) * to a new space for accessing them. */ ab->mem_ce = ioremap(ce_remap->base, ce_remap->size); - if (IS_ERR(ab->mem_ce)) { + if (!ab->mem_ce) { dev_err(&pdev->dev, "ce ioremap error\n"); ret = -ENOMEM; goto err_core_free; -- GitLab From 7654cc03eb699297130b693ec34e25f77b17c947 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 16 Feb 2023 22:23:01 +0300 Subject: [PATCH 0010/2078] wifi: ath9k: hif_usb: fix memory leak of remain_skbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hif_dev->remain_skb is allocated and used exclusively in ath9k_hif_usb_rx_stream(). It is implied that an allocated remain_skb is processed and subsequently freed (in error paths) only during the next call of ath9k_hif_usb_rx_stream(). So, if the urbs are deallocated between those two calls due to the device deinitialization or suspend, it is possible that ath9k_hif_usb_rx_stream() is not called next time and the allocated remain_skb is leaked. Our local Syzkaller instance was able to trigger that. remain_skb makes sense when receiving two consecutive urbs which are logically linked together, i.e. a specific data field from the first skb indicates a cached skb to be allocated, memcpy'd with some data and subsequently processed in the next call to ath9k_hif_usb_rx_stream(). Urbs deallocation supposedly makes that link irrelevant so we need to free the cached skb in those cases. Fix the leak by introducing a function to explicitly free remain_skb (if it is not NULL) when the rx urbs have been deallocated. remain_skb is NULL when it has not been allocated at all (hif_dev struct is kzalloced) or when it has been processed in next call to ath9k_hif_usb_rx_stream(). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: fb9987d0f748 ("ath9k_htc: Support for AR9271 chipset.") Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230216192301.171225-1-pchelkin@ispras.ru --- drivers/net/wireless/ath/ath9k/hif_usb.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index f521dfa2f1945..e0130beb304df 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -534,6 +534,24 @@ static struct ath9k_htc_hif hif_usb = { .send = hif_usb_send, }; +/* Need to free remain_skb allocated in ath9k_hif_usb_rx_stream + * in case ath9k_hif_usb_rx_stream wasn't called next time to + * process the buffer and subsequently free it. + */ +static void ath9k_hif_usb_free_rx_remain_skb(struct hif_device_usb *hif_dev) +{ + unsigned long flags; + + spin_lock_irqsave(&hif_dev->rx_lock, flags); + if (hif_dev->remain_skb) { + dev_kfree_skb_any(hif_dev->remain_skb); + hif_dev->remain_skb = NULL; + hif_dev->rx_remain_len = 0; + RX_STAT_INC(hif_dev, skb_dropped); + } + spin_unlock_irqrestore(&hif_dev->rx_lock, flags); +} + static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev, struct sk_buff *skb) { @@ -868,6 +886,7 @@ err: static void ath9k_hif_usb_dealloc_rx_urbs(struct hif_device_usb *hif_dev) { usb_kill_anchored_urbs(&hif_dev->rx_submitted); + ath9k_hif_usb_free_rx_remain_skb(hif_dev); } static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev) -- GitLab From 95a389e2ff3212d866cc51c77d682d2934074eb8 Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Sun, 22 Jan 2023 07:19:36 +0530 Subject: [PATCH 0011/2078] wifi: ath12k: Handle lock during peer_id find ath12k_peer_find_by_id() requires that the caller hold the ab->base_lock. Currently the WBM error path does not hold the lock and calling that function, leads to the following lockdep_assert()in QCN9274: [105162.160893] ------------[ cut here ]------------ [105162.160916] WARNING: CPU: 3 PID: 0 at drivers/net/wireless/ath/ath12k/peer.c:71 ath12k_peer_find_by_id+0x52/0x60 [ath12k] [105162.160933] Modules linked in: ath12k(O) qrtr_mhi qrtr mac80211 cfg80211 mhi qmi_helpers libarc4 nvme nvme_core [last unloaded: ath12k(O)] [105162.160967] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G W O 6.1.0-rc2+ #3 [105162.160972] Hardware name: Intel(R) Client Systems NUC8i7HVK/NUC8i7HVB, BIOS HNKBLi70.86A.0056.2019.0506.1527 05/06/2019 [105162.160977] RIP: 0010:ath12k_peer_find_by_id+0x52/0x60 [ath12k] [105162.160990] Code: 07 eb 0f 39 68 24 74 0a 48 8b 00 48 39 f8 75 f3 31 c0 5b 5d c3 48 8d bf b0 f2 00 00 be ff ff ff ff e8 22 20 c4 e2 85 c0 75 bf <0f> 0b eb bb 66 2e 0f 1f 84 00 00 00 00 00 41 54 4c 8d a7 98 f2 00 [105162.160996] RSP: 0018:ffffa223001acc60 EFLAGS: 00010246 [105162.161003] RAX: 0000000000000000 RBX: ffff9f0573940000 RCX: 0000000000000000 [105162.161008] RDX: 0000000000000001 RSI: ffffffffa3951c8e RDI: ffffffffa39a96d7 [105162.161013] RBP: 000000000000000a R08: 0000000000000000 R09: 0000000000000000 [105162.161017] R10: ffffa223001acb40 R11: ffffffffa3d57c60 R12: ffff9f057394f2e0 [105162.161022] R13: ffff9f0573940000 R14: ffff9f04ecd659c0 R15: ffff9f04d5a9b040 [105162.161026] FS: 0000000000000000(0000) GS:ffff9f0575600000(0000) knlGS:0000000000000000 [105162.161031] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [105162.161036] CR2: 00001d5c8277a008 CR3: 00000001e6224006 CR4: 00000000003706e0 [105162.161041] Call Trace: [105162.161046] [105162.161051] ath12k_dp_rx_process_wbm_err+0x6da/0xaf0 [ath12k] [105162.161072] ? ath12k_dp_rx_process_err+0x80e/0x15a0 [ath12k] [105162.161084] ? __lock_acquire+0x4ca/0x1a60 [105162.161104] ath12k_dp_service_srng+0x263/0x310 [ath12k] [105162.161120] ath12k_pci_ext_grp_napi_poll+0x1c/0x70 [ath12k] [105162.161133] __napi_poll+0x22/0x260 [105162.161141] net_rx_action+0x2f8/0x380 [105162.161153] __do_softirq+0xd0/0x4c9 [105162.161162] irq_exit_rcu+0x88/0xe0 [105162.161169] common_interrupt+0xa5/0xc0 [105162.161174] [105162.161179] [105162.161184] asm_common_interrupt+0x22/0x40 Handle spin lock/unlock in WBM error path to hold the necessary lock expected by ath12k_peer_find_by_id(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0-03171-QCAHKSWPL_SILICONZ-1 Signed-off-by: Ramya Gnanasekar Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230122014936.3594-1-quic_rgnanase@quicinc.com --- drivers/net/wireless/ath/ath12k/dp_rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index eb67b3409f858..0adcbcfa0db57 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -3494,11 +3494,14 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, msdu_len = ath12k_dp_rx_h_msdu_len(ab, desc); peer_id = ath12k_dp_rx_h_peer_id(ab, desc); + spin_lock(&ab->base_lock); if (!ath12k_peer_find_by_id(ab, peer_id)) { + spin_unlock(&ab->base_lock); ath12k_dbg(ab, ATH12K_DBG_DATA, "invalid peer id received in wbm err pkt%d\n", peer_id); return -EINVAL; } + spin_unlock(&ab->base_lock); if (!rxcb->is_frag && ((msdu_len + hal_rx_desc_sz) > DP_RX_BUFFER_SIZE)) { /* First buffer will be freed by the caller, so deduct it's length */ -- GitLab From 80e396586d0a94c42015dd9472176d89a3b0e4ca Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Mon, 23 Jan 2023 15:21:41 +0530 Subject: [PATCH 0012/2078] wifi: ath12k: PCI ops for wakeup/release MHI Wakeup/release MHI is not needed before pci_read/write for QCN9274. Since wakeup & release MHI is enabled for all QCN9274 and WCN7850, below MHI assert is seen in QCN9274 [ 784.906613] BUG: sleeping function called from invalid context at drivers/bus/mhi/host/pm.c:989 [ 784.906633] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/3 [ 784.906637] preempt_count: 503, expected: 0 [ 784.906641] RCU nest depth: 0, expected: 0 [ 784.906644] 2 locks held by swapper/3/0: [ 784.906646] #0: ffff8ed348e429e0 (&ab->ce.ce_lock){+.-.}-{2:2}, at: ath12k_ce_recv_process_cb+0xb3/0x2f0 [ath12k] [ 784.906664] #1: ffff8ed348e491f0 (&srng->lock_key#3){+.-.}-{2:2}, at: ath12k_ce_recv_process_cb+0xfb/0x2f0 [ath12k] [ 784.906678] Preemption disabled at: [ 784.906680] [<0000000000000000>] 0x0 [ 784.906686] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G W O 6.1.0-rc2+ #3 [ 784.906688] Hardware name: Intel(R) Client Systems NUC8i7HVK/NUC8i7HVB, BIOS HNKBLi70.86A.0056.2019.0506.1527 05/06/2019 [ 784.906690] Call Trace: [ 784.906691] [ 784.906693] dump_stack_lvl+0x56/0x7b [ 784.906698] __might_resched+0x21c/0x270 [ 784.906704] __mhi_device_get_sync+0x7d/0x1c0 [mhi] [ 784.906714] mhi_device_get_sync+0xd/0x20 [mhi] [ 784.906719] ath12k_pci_write32+0x75/0x170 [ath12k] [ 784.906729] ath12k_hal_srng_access_end+0x55/0xc0 [ath12k] [ 784.906737] ath12k_ce_recv_process_cb+0x1f3/0x2f0 [ath12k] [ 784.906776] ? ath12k_pci_ce_tasklet+0x11/0x30 [ath12k] [ 784.906788] ath12k_pci_ce_tasklet+0x11/0x30 [ath12k] [ 784.906813] tasklet_action_common.isra.18+0xb7/0xe0 [ 784.906820] __do_softirq+0xd0/0x4c9 [ 784.906826] irq_exit_rcu+0x88/0xe0 [ 784.906828] common_interrupt+0xa5/0xc0 [ 784.906831] [ 784.906832] Adding function callbacks for MHI wakeup and release operations. QCN9274 does not need wakeup/release, function callbacks are initialized to NULL. In case of WCN7850, shadow registers are used to access rings. Since, shadow register's offset is less than ACCESS_ALWAYS_OFF, mhi_device_get_sync() or mhi_device_put() to wakeup and release mhi will not be called during service ring accesses. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0-03171-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 Signed-off-by: Ramya Gnanasekar Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230123095141.5310-1-quic_rgnanase@quicinc.com --- drivers/net/wireless/ath/ath12k/pci.c | 47 ++++++++++++++++++++++----- drivers/net/wireless/ath/ath12k/pci.h | 6 ++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index ae7f6083c9fc2..d32637b0113db 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -119,6 +119,30 @@ static const char *irq_name[ATH12K_IRQ_NUM_MAX] = { "tcl2host-status-ring", }; +static int ath12k_pci_bus_wake_up(struct ath12k_base *ab) +{ + struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); + + return mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev); +} + +static void ath12k_pci_bus_release(struct ath12k_base *ab) +{ + struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); + + mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); +} + +static const struct ath12k_pci_ops ath12k_pci_ops_qcn9274 = { + .wakeup = NULL, + .release = NULL, +}; + +static const struct ath12k_pci_ops ath12k_pci_ops_wcn7850 = { + .wakeup = ath12k_pci_bus_wake_up, + .release = ath12k_pci_bus_release, +}; + static void ath12k_pci_select_window(struct ath12k_pci *ab_pci, u32 offset) { struct ath12k_base *ab = ab_pci->ab; @@ -989,13 +1013,14 @@ u32 ath12k_pci_read32(struct ath12k_base *ab, u32 offset) { struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); u32 val, window_start; + int ret = 0; /* for offset beyond BAR + 4K - 32, may * need to wakeup MHI to access. */ if (test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && - offset >= ACCESS_ALWAYS_OFF) - mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev); + offset >= ACCESS_ALWAYS_OFF && ab_pci->pci_ops->wakeup) + ret = ab_pci->pci_ops->wakeup(ab); if (offset < WINDOW_START) { val = ioread32(ab->mem + offset); @@ -1023,9 +1048,9 @@ u32 ath12k_pci_read32(struct ath12k_base *ab, u32 offset) } if (test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && - offset >= ACCESS_ALWAYS_OFF) - mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); - + offset >= ACCESS_ALWAYS_OFF && ab_pci->pci_ops->release && + !ret) + ab_pci->pci_ops->release(ab); return val; } @@ -1033,13 +1058,14 @@ void ath12k_pci_write32(struct ath12k_base *ab, u32 offset, u32 value) { struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); u32 window_start; + int ret = 0; /* for offset beyond BAR + 4K - 32, may * need to wakeup MHI to access. */ if (test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && - offset >= ACCESS_ALWAYS_OFF) - mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev); + offset >= ACCESS_ALWAYS_OFF && ab_pci->pci_ops->wakeup) + ret = ab_pci->pci_ops->wakeup(ab); if (offset < WINDOW_START) { iowrite32(value, ab->mem + offset); @@ -1067,8 +1093,9 @@ void ath12k_pci_write32(struct ath12k_base *ab, u32 offset, u32 value) } if (test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags) && - offset >= ACCESS_ALWAYS_OFF) - mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); + offset >= ACCESS_ALWAYS_OFF && ab_pci->pci_ops->release && + !ret) + ab_pci->pci_ops->release(ab); } int ath12k_pci_power_up(struct ath12k_base *ab) @@ -1182,6 +1209,7 @@ static int ath12k_pci_probe(struct pci_dev *pdev, case QCN9274_DEVICE_ID: ab_pci->msi_config = &ath12k_msi_config[0]; ab->static_window_map = true; + ab_pci->pci_ops = &ath12k_pci_ops_qcn9274; ath12k_pci_read_hw_version(ab, &soc_hw_version_major, &soc_hw_version_minor); switch (soc_hw_version_major) { @@ -1202,6 +1230,7 @@ static int ath12k_pci_probe(struct pci_dev *pdev, ab_pci->msi_config = &ath12k_msi_config[0]; ab->static_window_map = false; ab->hw_rev = ATH12K_HW_WCN7850_HW20; + ab_pci->pci_ops = &ath12k_pci_ops_wcn7850; break; default: diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 0d9e40ab31f26..0f24fd9395cd9 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -86,6 +86,11 @@ enum ath12k_pci_flags { ATH12K_PCI_ASPM_RESTORE, }; +struct ath12k_pci_ops { + int (*wakeup)(struct ath12k_base *ab); + void (*release)(struct ath12k_base *ab); +}; + struct ath12k_pci { struct pci_dev *pdev; struct ath12k_base *ab; @@ -103,6 +108,7 @@ struct ath12k_pci { /* enum ath12k_pci_flags */ unsigned long flags; u16 link_ctl; + const struct ath12k_pci_ops *pci_ops; }; static inline struct ath12k_pci *ath12k_pci_priv(struct ath12k_base *ab) -- GitLab From f117276638b7600b981b3fe28550823cfbe1ef23 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 1 Feb 2023 08:54:42 -0800 Subject: [PATCH 0013/2078] wifi: ath11k: Use platform_get_irq() to get the interrupt As of commit a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core"), we need to use platform_get_irq() instead of platform_get_resource() to get our IRQs because platform_get_resource() simply won't get them anymore. This was already fixed in several other Atheros WiFi drivers, apparently in response to Zeal Robot reports. An example of another fix is commit 9503a1fc123d ("ath9k: Use platform_get_irq() to get the interrupt"). ath11k seems to have been missed in this effort, though. Without this change, WiFi wasn't coming up on my Qualcomm sc7280-based hardware. Specifically, "platform_get_resource(pdev, IORESOURCE_IRQ, i)" was failing even for i=0. Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-00887-QCAMSLSWPLZ-1 Fixes: a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core") Fixes: 00402f49d26f ("ath11k: Add support for WCN6750 device") Signed-off-by: Douglas Anderson Tested-by: Jun Yu Reviewed-by: Lad Prabhakar Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230201084131.v2.1.I69cf3d56c97098287fe3a70084ee515098390b70@changeid --- drivers/net/wireless/ath/ath11k/ahb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 022caacd28220..6b6059d5dc385 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -874,11 +874,11 @@ static int ath11k_ahb_setup_msi_resources(struct ath11k_base *ab) ab->pci.msi.ep_base_data = int_prop + 32; for (i = 0; i < ab->pci.msi.config->total_vectors; i++) { - res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!res) - return -ENODEV; + ret = platform_get_irq(pdev, i); + if (ret < 0) + return ret; - ab->pci.msi.irqs[i] = res->start; + ab->pci.msi.irqs[i] = ret; } set_bit(ATH11K_FLAG_MULTI_MSI_VECTORS, &ab->dev_flags); -- GitLab From 95c95251d0547b46d6571e4fbd51b42865c15a4a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 1 Feb 2023 08:54:43 -0800 Subject: [PATCH 0014/2078] wifi: ath5k: Use platform_get_irq() to get the interrupt As of commit a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core"), we need to use platform_get_irq() instead of platform_get_resource() to get our IRQs because platform_get_resource() simply won't get them anymore. This was already fixed in several other Atheros WiFi drivers, apparently in response to Zeal Robot reports. An example of another fix is commit 9503a1fc123d ("ath9k: Use platform_get_irq() to get the interrupt"). ath5k seems to have been missed in this effort, though. Fixes: a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core") Signed-off-by: Douglas Anderson Reviewed-by: Lad Prabhakar Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230201084131.v2.2.Ic4f8542b0588d7eb4bc6e322d4af3d2064e84ff0@changeid --- drivers/net/wireless/ath/ath5k/ahb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/ahb.c b/drivers/net/wireless/ath/ath5k/ahb.c index 2c9cec8b53d9e..28a1e5eff204e 100644 --- a/drivers/net/wireless/ath/ath5k/ahb.c +++ b/drivers/net/wireless/ath/ath5k/ahb.c @@ -113,15 +113,13 @@ static int ath_ahb_probe(struct platform_device *pdev) goto err_out; } - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res == NULL) { - dev_err(&pdev->dev, "no IRQ resource found\n"); - ret = -ENXIO; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no IRQ resource found: %d\n", irq); + ret = irq; goto err_iounmap; } - irq = res->start; - hw = ieee80211_alloc_hw(sizeof(struct ath5k_hw), &ath5k_hw_ops); if (hw == NULL) { dev_err(&pdev->dev, "no memory for ieee80211_hw\n"); -- GitLab From 4c856ee12df85aabd437c3836ed9f68d94268358 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Feb 2023 16:15:48 +0300 Subject: [PATCH 0015/2078] wifi: ath5k: fix an off by one check in ath5k_eeprom_read_freq_list() This loop checks that i < max at the start of loop but then it does i++ which could put it past the end of the array. It's harmless to check again and prevent a potential out of bounds. Fixes: 1048643ea94d ("ath5k: Clean up eeprom parsing and add missing calibration data") Signed-off-by: Dan Carpenter Reviewed-by: Luis Chamberlain Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/Y+D9hPQrHfWBJhXz@kili --- drivers/net/wireless/ath/ath5k/eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index d444b3d70ba2e..58d3e86f6256d 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -529,7 +529,7 @@ ath5k_eeprom_read_freq_list(struct ath5k_hw *ah, int *offset, int max, ee->ee_n_piers[mode]++; freq2 = (val >> 8) & 0xff; - if (!freq2) + if (!freq2 || i >= max) break; pc[i++].freq = ath5k_eeprom_bin2freq(ee, -- GitLab From 8c68fe00344c01bfba95b9f978af0aa236a821f7 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 3 Feb 2023 00:01:40 +0000 Subject: [PATCH 0016/2078] wifi: ath10k: snoc: enable threaded napi on WCN3990 NAPI poll can be done in threaded context along with soft irq context. Threaded context can be scheduled efficiently, thus creating less of bottleneck during Rx processing. This patch is to enable threaded NAPI on ath10k driver. Based on testing, it was observed that on WCN3990, the CPU0 reaches 100% utilization when napi runs in softirq context. At the same time the other CPUs are at low consumption percentage. This does not allow device to reach its maximum throughput potential. After enabling threaded napi, CPU load is balanced across all CPUs and following improvments were observed: - UDP_RX increase by ~22-25% - TCP_RX increase by ~15% Here are some of the additional raw data with and without threaded napi: ================================================== udp_rx(Without threaded NAPI) 435.98+-5.16 : Channel 44 439.06+-0.66 : Channel 157 udp_rx(With threaded NAPI) 509.73+-41.03 : Channel 44 549.97+-7.62 : Channel 157 =================================================== udp_tx(Without threaded NAPI) 461.31+-0.69 : Channel 44 461.46+-0.78 : Channel 157 udp_tx(With threaded NAPI) 459.20+-0.77 : Channel 44 459.78+-1.08 : Channel 157 =================================================== tcp_rx(Without threaded NAPI) 472.63+-2.35 : Channel 44 469.29+-6.31 : Channel 157 tcp_rx(With threaded NAPI) 498.49+-2.44 : Channel 44 541.14+-40.65 : Channel 157 =================================================== tcp_tx(Without threaded NAPI) 317.34+-2.37 : Channel 44 317.01+-2.56 : Channel 157 tcp_tx(With threaded NAPI) 371.34+-2.36 : Channel 44 376.95+-9.40 : Channel 157 =================================================== Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.2.2-00696-QCAHLSWMTPL-1 Signed-off-by: Abhishek Kumar Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230203000116.v2.1.I5bb9c164a2d2025655dee810b983e01ecd81c14e@changeid --- drivers/net/wireless/ath/ath10k/snoc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index cfcb759a87dea..0f6d2f67ff6b4 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -927,6 +927,7 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) bitmap_clear(ar_snoc->pending_ce_irqs, 0, CE_COUNT_MAX); + dev_set_threaded(&ar->napi_dev, true); ath10k_core_napi_enable(ar); ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); -- GitLab From 7c4c511f74ba65a18f0beeb9b2fc0e34f28ee79d Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 7 Feb 2023 13:24:10 +0800 Subject: [PATCH 0017/2078] wifi: ath10k: Remove redundant assignment to changed_flags Variable changed_flags is assigned, but is not effectively used, so delete it. drivers/net/wireless/ath/ath10k/mac.c:6024:22: warning: parameter 'changed_flags' set but not used. Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3963 Signed-off-by: Jiapeng Chong Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230207052410.26337-1-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/ath/ath10k/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index ec8d5b29bc72c..7675858f069bd 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6030,7 +6030,6 @@ static void ath10k_configure_filter(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); - changed_flags &= SUPPORTED_FILTERS; *total_flags &= SUPPORTED_FILTERS; ar->filter_flags = *total_flags; -- GitLab From 60b7d62ba8cdbd073997bff0f1cdae8d844002c0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 9 Feb 2023 23:26:22 +0100 Subject: [PATCH 0018/2078] wifi: ath11k: fix SAC bug on peer addition with sta band migration Fix sleep in atomic context warning detected by Smatch static checker analyzer. Following the locking pattern for peer_rhash_add lock tbl_mtx_lock mutex always even if sta is not transitioning to another band. This is peer_add function and a more secure locking should not cause performance regression. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.5.0.1-01208-QCAHKSWPL_SILICONZ-1 Fixes: d673cb6fe6c0 ("wifi: ath11k: fix peer addition/deletion error on sta band migration") Reported-by: Dan Carpenter Signed-off-by: Christian Marangi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230209222622.1751-1-ansuelsmth@gmail.com --- drivers/net/wireless/ath/ath11k/peer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c index 1ae7af02c364e..1380811827a84 100644 --- a/drivers/net/wireless/ath/ath11k/peer.c +++ b/drivers/net/wireless/ath/ath11k/peer.c @@ -382,22 +382,23 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif, return -ENOBUFS; } + mutex_lock(&ar->ab->tbl_mtx_lock); spin_lock_bh(&ar->ab->base_lock); peer = ath11k_peer_find_by_addr(ar->ab, param->peer_addr); if (peer) { if (peer->vdev_id == param->vdev_id) { spin_unlock_bh(&ar->ab->base_lock); + mutex_unlock(&ar->ab->tbl_mtx_lock); return -EINVAL; } /* Assume sta is transitioning to another band. * Remove here the peer from rhash. */ - mutex_lock(&ar->ab->tbl_mtx_lock); ath11k_peer_rhash_delete(ar->ab, peer); - mutex_unlock(&ar->ab->tbl_mtx_lock); } spin_unlock_bh(&ar->ab->base_lock); + mutex_unlock(&ar->ab->tbl_mtx_lock); ret = ath11k_wmi_send_peer_create_cmd(ar, param); if (ret) { -- GitLab From 59e6ded57cc1b3b5117cef66dabe6d7977208ba0 Mon Sep 17 00:00:00 2001 From: Lu jicong Date: Tue, 14 Feb 2023 06:36:02 +0000 Subject: [PATCH 0019/2078] wifi: rtlwifi: rtl8192ce: fix dealing empty EEPROM values On OpenWRT platform, RTL8192CE could be soldered on board, but not standard PCI module. In this case, some EEPROM values aren't programmed and left 0xff. Load default values when the EEPROM values are empty to avoid problems. Signed-off-by: Lu jicong Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214063602.2257263-1-jiconglu58@gmail.com --- .../wireless/realtek/rtlwifi/rtl8192ce/hw.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index b9c62640d2cbd..dc480323c9cbf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -1428,7 +1428,9 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, for (rf_path = 0; rf_path < 2; rf_path++) { for (i = 0; i < 3; i++) { - if (!autoload_fail) { + if (!autoload_fail && + hwinfo[EEPROM_TXPOWERCCK + rf_path * 3 + i] != 0xff && + hwinfo[EEPROM_TXPOWERHT40_1S + rf_path * 3 + i] != 0xff) { rtlefuse-> eeprom_chnlarea_txpwr_cck[rf_path][i] = hwinfo[EEPROM_TXPOWERCCK + rf_path * 3 + i]; @@ -1448,7 +1450,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, } for (i = 0; i < 3; i++) { - if (!autoload_fail) + if (!autoload_fail && + hwinfo[EEPROM_TXPOWERHT40_2SDIFF + i] != 0xff) tempval = hwinfo[EEPROM_TXPOWERHT40_2SDIFF + i]; else tempval = EEPROM_DEFAULT_HT40_2SDIFF; @@ -1518,7 +1521,9 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, } for (i = 0; i < 3; i++) { - if (!autoload_fail) { + if (!autoload_fail && + hwinfo[EEPROM_TXPWR_GROUP + i] != 0xff && + hwinfo[EEPROM_TXPWR_GROUP + 3 + i] != 0xff) { rtlefuse->eeprom_pwrlimit_ht40[i] = hwinfo[EEPROM_TXPWR_GROUP + i]; rtlefuse->eeprom_pwrlimit_ht20[i] = @@ -1563,7 +1568,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, for (i = 0; i < 14; i++) { index = rtl92c_get_chnl_group((u8)i); - if (!autoload_fail) + if (!autoload_fail && + hwinfo[EEPROM_TXPOWERHT20DIFF + index] != 0xff) tempval = hwinfo[EEPROM_TXPOWERHT20DIFF + index]; else tempval = EEPROM_DEFAULT_HT20_DIFF; @@ -1580,7 +1586,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, index = rtl92c_get_chnl_group((u8)i); - if (!autoload_fail) + if (!autoload_fail && + hwinfo[EEPROM_TXPOWER_OFDMDIFF + index] != 0xff) tempval = hwinfo[EEPROM_TXPOWER_OFDMDIFF + index]; else tempval = EEPROM_DEFAULT_LEGACYHTTXPOWERDIFF; @@ -1610,14 +1617,16 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, "RF-B Legacy to HT40 Diff[%d] = 0x%x\n", i, rtlefuse->txpwr_legacyhtdiff[RF90_PATH_B][i]); - if (!autoload_fail) + if (!autoload_fail && hwinfo[RF_OPTION1] != 0xff) rtlefuse->eeprom_regulatory = (hwinfo[RF_OPTION1] & 0x7); else rtlefuse->eeprom_regulatory = 0; RTPRINT(rtlpriv, FINIT, INIT_TXPOWER, "eeprom_regulatory = 0x%x\n", rtlefuse->eeprom_regulatory); - if (!autoload_fail) { + if (!autoload_fail && + hwinfo[EEPROM_TSSI_A] != 0xff && + hwinfo[EEPROM_TSSI_B] != 0xff) { rtlefuse->eeprom_tssi[RF90_PATH_A] = hwinfo[EEPROM_TSSI_A]; rtlefuse->eeprom_tssi[RF90_PATH_B] = hwinfo[EEPROM_TSSI_B]; } else { @@ -1628,7 +1637,7 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw, rtlefuse->eeprom_tssi[RF90_PATH_A], rtlefuse->eeprom_tssi[RF90_PATH_B]); - if (!autoload_fail) + if (!autoload_fail && hwinfo[EEPROM_THERMAL_METER] != 0xff) tempval = hwinfo[EEPROM_THERMAL_METER]; else tempval = EEPROM_DEFAULT_THERMALMETER; -- GitLab From 015bf4df8ea67080668ac8314e692076dea1ce6d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 15 Feb 2023 13:34:37 +0100 Subject: [PATCH 0020/2078] wifi: wfx: Remove some dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wait_for_completion_timeout() can not return a <0 value. So simplify the logic and remove dead code. -ERESTARTSYS can not be returned by do_wait_for_common() for tasks with TASK_UNINTERRUPTIBLE, which is the case for wait_for_completion_timeout() Signed-off-by: Christophe JAILLET Reviewed-by: Jérôme Pouiller Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/809c4a645c8d1306c0d256345515865c40ec731c.1676464422.git.christophe.jaillet@wanadoo.fr --- drivers/net/wireless/silabs/wfx/main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/silabs/wfx/main.c b/drivers/net/wireless/silabs/wfx/main.c index 6b9864e478ac8..0b50f7058bbbc 100644 --- a/drivers/net/wireless/silabs/wfx/main.c +++ b/drivers/net/wireless/silabs/wfx/main.c @@ -358,13 +358,9 @@ int wfx_probe(struct wfx_dev *wdev) wfx_bh_poll_irq(wdev); err = wait_for_completion_timeout(&wdev->firmware_ready, 1 * HZ); - if (err <= 0) { - if (err == 0) { - dev_err(wdev->dev, "timeout while waiting for startup indication\n"); - err = -ETIMEDOUT; - } else if (err == -ERESTARTSYS) { - dev_info(wdev->dev, "probe interrupted by user\n"); - } + if (err == 0) { + dev_err(wdev->dev, "timeout while waiting for startup indication\n"); + err = -ETIMEDOUT; goto bh_unregister; } -- GitLab From 398eb19415ebb01ccc8e5bb75ff04cb760b01e05 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 16 Feb 2023 08:46:54 +0800 Subject: [PATCH 0021/2078] wifi: rtl8xxxu: 8188e: parse single one element of RA report for station mode Intentionally parsing single one element of RA report by breaking loop causes a smatch warning: drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c:1678 rtl8188e_handle_ra_tx_report2() warn: ignoring unreachable code. With existing comments, it intends to process single one element for station mode, but it will parse more elements in AP mode if it's implemented. Implement program logic according to existing comment to avoid smatch warning, and also be usable for both AP and stations modes. Compile test only. Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202302142135.LCqUTVGY-lkp@intel.com/ Cc: Bitterblue Smith Signed-off-by: Ping-Ke Shih Tested-by: Bitterblue Smith Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230216004654.4642-1-pkshih@realtek.com --- .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c index a99ddb41cd244..f15b099899e5c 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c @@ -1699,6 +1699,12 @@ void rtl8188e_handle_ra_tx_report2(struct rtl8xxxu_priv *priv, struct sk_buff *s dev_dbg(dev, "%s: len: %d items: %d\n", __func__, tx_rpt_len, items); + /* We only use macid 0, so only the first item is relevant. + * AP mode will use more of them if it's ever implemented. + */ + if (!priv->vif || priv->vif->type == NL80211_IFTYPE_STATION) + items = 1; + for (macid = 0; macid < items; macid++) { valid = false; @@ -1741,12 +1747,6 @@ void rtl8188e_handle_ra_tx_report2(struct rtl8xxxu_priv *priv, struct sk_buff *s min_rpt_time = ra->rpt_time; rpt += TX_RPT2_ITEM_SIZE; - - /* - * We only use macid 0, so only the first item is relevant. - * AP mode will use more of them if it's ever implemented. - */ - break; } if (min_rpt_time != ra->pre_min_rpt_time) { -- GitLab From db5e4b3645534bc8f8d6c24537e9564a62073ac8 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 17 Feb 2023 17:25:29 +0800 Subject: [PATCH 0022/2078] wifi: rtlwifi: rtl8192de: Remove the unused variable bcnfunc_enable Variable bcnfunc_enable is not effectively used, so delete it. drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c:1050:5: warning: variable 'bcnfunc_enable' set but not used. Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4110 Signed-off-by: Jiapeng Chong Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230217092529.105899-1-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index 2aecb2583f75e..df1e36fbc348a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1047,7 +1047,6 @@ static int _rtl92de_set_media_status(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv = rtl_priv(hw); u8 bt_msr = rtl_read_byte(rtlpriv, MSR); enum led_ctl_mode ledaction = LED_CTL_NO_LINK; - u8 bcnfunc_enable; bt_msr &= 0xfc; @@ -1064,31 +1063,26 @@ static int _rtl92de_set_media_status(struct ieee80211_hw *hw, "Set HW_VAR_MEDIA_STATUS: No such media status(%x)\n", type); } - bcnfunc_enable = rtl_read_byte(rtlpriv, REG_BCN_CTRL); switch (type) { case NL80211_IFTYPE_UNSPECIFIED: bt_msr |= MSR_NOLINK; ledaction = LED_CTL_LINK; - bcnfunc_enable &= 0xF7; rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "Set Network type to NO LINK!\n"); break; case NL80211_IFTYPE_ADHOC: bt_msr |= MSR_ADHOC; - bcnfunc_enable |= 0x08; rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "Set Network type to Ad Hoc!\n"); break; case NL80211_IFTYPE_STATION: bt_msr |= MSR_INFRA; ledaction = LED_CTL_LINK; - bcnfunc_enable &= 0xF7; rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "Set Network type to STA!\n"); break; case NL80211_IFTYPE_AP: bt_msr |= MSR_AP; - bcnfunc_enable |= 0x08; rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "Set Network type to AP!\n"); break; -- GitLab From c6aa9a9c47252ac7b07ed6d10459027e2f2a2de0 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Fri, 17 Feb 2023 20:00:07 +0800 Subject: [PATCH 0023/2078] wifi: rtw89: add RNR support for 6 GHz scan Since 6 GHz band has around 60 channels and more strict rules for active probing. Reduced neighbor report can be used to reduce the channels we scan and get specific target BSS info to probe for. Declare flag WIPHY_FLAG_SPLIT_SCAN_6GHZ so the scan request could be divided into two portions: legacy bands and 6 GHz bands. So RNR information from legacy bands could later be used when 6 GHz scan. When the scan flag NL80211_SCAN_FLAG_COLOCATED_6GHZ is set, cfg80211 will pass down a reduced channel set which contains PSCs and non-PSC with RNR info received in the 2 GHz/5 GHz band. This reduces the scan duration by allowing us to only scan for channels in which APs are currently operating. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230217120007.8835-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 35 +++++- drivers/net/wireless/realtek/rtw89/fw.c | 136 +++++++++++++++++++--- drivers/net/wireless/realtek/rtw89/fw.h | 7 ++ 3 files changed, 162 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index f09361bc4a4d1..489fa7a86160d 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -1400,6 +1400,34 @@ static void rtw89_stats_trigger_frame(struct rtw89_dev *rtwdev, } } +static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev, + struct sk_buff *skb) +{ + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct list_head *pkt_list = rtwdev->scan_info.pkt_list; + struct rtw89_pktofld_info *info; + const u8 *ies = mgmt->u.beacon.variable, *ssid_ie; + + if (rx_status->band != NL80211_BAND_6GHZ) + return; + + ssid_ie = cfg80211_find_ie(WLAN_EID_SSID, ies, skb->len); + + list_for_each_entry(info, &pkt_list[NL80211_BAND_6GHZ], list) { + if (ether_addr_equal(info->bssid, mgmt->bssid)) { + rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id); + continue; + } + + if (!ssid_ie || ssid_ie[1] != info->ssid_len || info->ssid_len == 0) + continue; + + if (memcmp(&ssid_ie[2], info->ssid, info->ssid_len) == 0) + rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id); + } +} + static void rtw89_vif_rx_stats_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { @@ -1412,6 +1440,11 @@ static void rtw89_vif_rx_stats_iter(void *data, u8 *mac, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; const u8 *bssid = iter_data->bssid; + if (rtwdev->scanning && + (ieee80211_is_beacon(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control))) + rtw89_core_cancel_6ghz_probe_tx(rtwdev, skb); + if (!vif->bss_conf.bssid) return; @@ -3372,7 +3405,7 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev) hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS | WIPHY_FLAG_TDLS_EXTERNAL_SETUP | - WIPHY_FLAG_AP_UAPSD; + WIPHY_FLAG_AP_UAPSD | WIPHY_FLAG_SPLIT_SCAN_6GHZ; hw->wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; hw->wiphy->max_scan_ssids = RTW89_SCANOFLD_MAX_SSID; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 0b73dc2e9ad77..a88f7974c1d6b 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2702,9 +2702,29 @@ static void rtw89_release_pkt_list(struct rtw89_dev *rtwdev) } } +static bool rtw89_is_6ghz_wildcard_probe_req(struct rtw89_dev *rtwdev, + struct rtw89_vif *rtwvif, + struct rtw89_pktofld_info *info, + enum nl80211_band band, u8 ssid_idx) +{ + struct cfg80211_scan_request *req = rtwvif->scan_req; + + if (band != NL80211_BAND_6GHZ) + return false; + + if (req->ssids[ssid_idx].ssid_len) { + memcpy(info->ssid, req->ssids[ssid_idx].ssid, + req->ssids[ssid_idx].ssid_len); + info->ssid_len = req->ssids[ssid_idx].ssid_len; + return false; + } else { + return true; + } +} + static int rtw89_append_probe_req_ie(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, - struct sk_buff *skb) + struct sk_buff *skb, u8 ssid_idx) { struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info; struct ieee80211_scan_ies *ies = rtwvif->scan_ies; @@ -2732,6 +2752,13 @@ static int rtw89_append_probe_req_ie(struct rtw89_dev *rtwdev, goto out; } + if (rtw89_is_6ghz_wildcard_probe_req(rtwdev, rtwvif, info, band, + ssid_idx)) { + kfree_skb(new); + kfree(info); + goto out; + } + ret = rtw89_fw_h2c_add_pkt_offload(rtwdev, &info->id, new); if (ret) { kfree_skb(new); @@ -2762,7 +2789,7 @@ static int rtw89_hw_scan_update_probe_req(struct rtw89_dev *rtwdev, if (!skb) return -ENOMEM; - ret = rtw89_append_probe_req_ie(rtwdev, rtwvif, skb); + ret = rtw89_append_probe_req_ie(rtwdev, rtwvif, skb, i); kfree_skb(skb); if (ret) @@ -2772,6 +2799,77 @@ static int rtw89_hw_scan_update_probe_req(struct rtw89_dev *rtwdev, return 0; } +static int rtw89_update_6ghz_rnr_chan(struct rtw89_dev *rtwdev, + struct cfg80211_scan_request *req, + struct rtw89_mac_chinfo *ch_info) +{ + struct ieee80211_vif *vif = rtwdev->scan_info.scanning_vif; + struct list_head *pkt_list = rtwdev->scan_info.pkt_list; + struct rtw89_vif *rtwvif = vif_to_rtwvif_safe(vif); + struct ieee80211_scan_ies *ies = rtwvif->scan_ies; + struct cfg80211_scan_6ghz_params *params; + struct rtw89_pktofld_info *info, *tmp; + struct ieee80211_hdr *hdr; + struct sk_buff *skb; + bool found; + int ret = 0; + u8 i; + + if (!req->n_6ghz_params) + return 0; + + for (i = 0; i < req->n_6ghz_params; i++) { + params = &req->scan_6ghz_params[i]; + + if (req->channels[params->channel_idx]->hw_value != + ch_info->pri_ch) + continue; + + found = false; + list_for_each_entry(tmp, &pkt_list[NL80211_BAND_6GHZ], list) { + if (ether_addr_equal(tmp->bssid, params->bssid)) { + found = true; + break; + } + } + if (found) + continue; + + skb = ieee80211_probereq_get(rtwdev->hw, rtwvif->mac_addr, + NULL, 0, req->ie_len); + skb_put_data(skb, ies->ies[NL80211_BAND_6GHZ], ies->len[NL80211_BAND_6GHZ]); + skb_put_data(skb, ies->common_ies, ies->common_ie_len); + hdr = (struct ieee80211_hdr *)skb->data; + ether_addr_copy(hdr->addr3, params->bssid); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + kfree_skb(skb); + goto out; + } + + ret = rtw89_fw_h2c_add_pkt_offload(rtwdev, &info->id, skb); + if (ret) { + kfree_skb(skb); + kfree(info); + goto out; + } + + ether_addr_copy(info->bssid, params->bssid); + info->channel_6ghz = req->channels[params->channel_idx]->hw_value; + list_add_tail(&info->list, &rtwdev->scan_info.pkt_list[NL80211_BAND_6GHZ]); + + ch_info->tx_pkt = true; + ch_info->period = RTW89_CHANNEL_TIME_6G + RTW89_DWELL_TIME_6G; + + kfree_skb(skb); + } + +out: + return ret; +} + static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type, int ssid_num, struct rtw89_mac_chinfo *ch_info) @@ -2782,6 +2880,7 @@ static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type, struct cfg80211_scan_request *req = rtwvif->scan_req; struct rtw89_pktofld_info *info; u8 band, probe_count = 0; + int ret; ch_info->notify_action = RTW89_SCANOFLD_DEBUG_MASK; ch_info->dfs_ch = chan_type == RTW89_CHAN_DFS; @@ -2793,25 +2892,31 @@ static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type, ch_info->pause_data = false; ch_info->probe_id = RTW89_SCANOFLD_PKT_NONE; + if (ch_info->ch_band == RTW89_BAND_6G) { + if ((ssid_num == 1 && req->ssids[0].ssid_len == 0) || + !ch_info->is_psc) { + ch_info->tx_pkt = false; + if (!req->duration_mandatory) + ch_info->period -= RTW89_DWELL_TIME_6G; + } + } + + ret = rtw89_update_6ghz_rnr_chan(rtwdev, req, ch_info); + if (ret) + rtw89_warn(rtwdev, "RNR fails: %d\n", ret); + if (ssid_num) { - ch_info->num_pkt = ssid_num; band = rtw89_hw_to_nl80211_band(ch_info->ch_band); list_for_each_entry(info, &scan_info->pkt_list[band], list) { - ch_info->pkt_id[probe_count] = info->id; - if (++probe_count >= ssid_num) + if (info->channel_6ghz && + ch_info->pri_ch != info->channel_6ghz) + continue; + ch_info->pkt_id[probe_count++] = info->id; + if (probe_count >= RTW89_SCANOFLD_MAX_SSID) break; } - if (probe_count != ssid_num) - rtw89_err(rtwdev, "SSID num differs from list len\n"); - } - - if (ch_info->ch_band == RTW89_BAND_6G) { - if (ssid_num == 1 && req->ssids[0].ssid_len == 0) { - ch_info->tx_pkt = false; - if (!req->duration_mandatory) - ch_info->period -= RTW89_DWELL_TIME_6G; - } + ch_info->num_pkt = probe_count; } switch (chan_type) { @@ -2872,6 +2977,7 @@ static int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev, ch_info->central_ch = channel->hw_value; ch_info->pri_ch = channel->hw_value; ch_info->rand_seq_num = random_seq; + ch_info->is_psc = cfg80211_channel_is_psc(channel); if (channel->flags & (IEEE80211_CHAN_RADAR | IEEE80211_CHAN_NO_IR)) diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index cae07e325326d..3f6e0871381df 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -237,6 +237,7 @@ struct rtw89_mac_chinfo { u16 tx_pwr_idx; u8 rsvd1; struct list_head list; + bool is_psc; }; struct rtw89_scan_option { @@ -247,6 +248,12 @@ struct rtw89_scan_option { struct rtw89_pktofld_info { struct list_head list; u8 id; + + /* Below fields are for 6 GHz RNR use only */ + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + u8 bssid[ETH_ALEN]; + u16 channel_6ghz; }; static inline void RTW89_SET_FWCMD_RA_IS_DIS(void *cmd, u32 val) -- GitLab From b9b1e4fe2957f361c86e288ecf373dc7895cf7c7 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 17 Feb 2023 20:49:12 +0200 Subject: [PATCH 0024/2078] wifi: rtl8xxxu: Remove always true condition in rtl8xxxu_print_chipinfo Fix a new smatch warning: drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c:1580 rtl8xxxu_print_chipinfo() warn: always true condition '(priv->chip_cut <= 15) => (0-15 <= 15)' Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202302140753.71IgU77A-lkp@intel.com/ Fixes: 7b0ac469e331 ("wifi: rtl8xxxu: Recognise all possible chip cuts") Signed-off-by: Bitterblue Smith Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/68eff98b-a022-5a00-f330-adf623a35772@gmail.com --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 620a5cc2bfdd1..54ca6f2ced3f3 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -1575,11 +1575,7 @@ rtl8xxxu_set_spec_sifs(struct rtl8xxxu_priv *priv, u16 cck, u16 ofdm) static void rtl8xxxu_print_chipinfo(struct rtl8xxxu_priv *priv) { struct device *dev = &priv->udev->dev; - char cut = '?'; - - /* Currently always true: chip_cut is 4 bits. */ - if (priv->chip_cut <= 15) - cut = 'A' + priv->chip_cut; + char cut = 'A' + priv->chip_cut; dev_info(dev, "RTL%s rev %c (%s) romver %d, %iT%iR, TX queues %i, WiFi=%i, BT=%i, GPS=%i, HI PA=%i\n", -- GitLab From 96c79da2e4d1a25e73916c656fe4ccf7fc8176ae Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 18 Feb 2023 16:29:40 +0100 Subject: [PATCH 0025/2078] wifi: rtw88: mac: Add support for the SDIO HCI in rtw_pwr_seq_parser() rtw_pwr_seq_parser() needs to know about the HCI bus interface mask for the SDIO bus so it can parse the chip state change sequences. Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230218152944.48842-2-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/mac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c index dae64901bac5a..c08071a67b41c 100644 --- a/drivers/net/wireless/realtek/rtw88/mac.c +++ b/drivers/net/wireless/realtek/rtw88/mac.c @@ -222,6 +222,9 @@ static int rtw_pwr_seq_parser(struct rtw_dev *rtwdev, case RTW_HCI_TYPE_USB: intf_mask = RTW_PWR_INTF_USB_MSK; break; + case RTW_HCI_TYPE_SDIO: + intf_mask = RTW_PWR_INTF_SDIO_MSK; + break; default: return -EINVAL; } -- GitLab From 8599ea40582d49afe437badde76e31bd7429c607 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 18 Feb 2023 16:29:41 +0100 Subject: [PATCH 0026/2078] wifi: rtw88: mac: Add SDIO HCI support in the TX/page table setup txdma_queue_mapping() and priority_queue_cfg() can use the first entry of each chip's rqpn_table and page_table. Add this mapping so data transmission is possible on SDIO based chipsets. Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230218152944.48842-3-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/mac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c index c08071a67b41c..1c9530a0eb693 100644 --- a/drivers/net/wireless/realtek/rtw88/mac.c +++ b/drivers/net/wireless/realtek/rtw88/mac.c @@ -1043,6 +1043,9 @@ static int txdma_queue_mapping(struct rtw_dev *rtwdev) else return -EINVAL; break; + case RTW_HCI_TYPE_SDIO: + rqpn = &chip->rqpn_table[0]; + break; default: return -EINVAL; } @@ -1205,6 +1208,9 @@ static int priority_queue_cfg(struct rtw_dev *rtwdev) else return -EINVAL; break; + case RTW_HCI_TYPE_SDIO: + pg_tbl = &chip->page_table[0]; + break; default: return -EINVAL; } -- GitLab From 64e9d564653566812dfade36d20f1794407e6ca1 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 18 Feb 2023 16:29:42 +0100 Subject: [PATCH 0027/2078] wifi: rtw88: rtw8821c: Implement RTL8821CS (SDIO) efuse parsing The efuse of the SDIO RTL8821CS chip has only one known member: the mac address is at offset 0x11a. Add a struct rtw8821cs_efuse describing this and use it for copying the mac address when the SDIO bus is used. Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230218152944.48842-4-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 9 +++++++++ drivers/net/wireless/realtek/rtw88/rtw8821c.h | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 17f800f6efbd0..7ae0541d7b995 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -32,6 +32,12 @@ static void rtw8821cu_efuse_parsing(struct rtw_efuse *efuse, ether_addr_copy(efuse->addr, map->u.mac_addr); } +static void rtw8821cs_efuse_parsing(struct rtw_efuse *efuse, + struct rtw8821c_efuse *map) +{ + ether_addr_copy(efuse->addr, map->s.mac_addr); +} + enum rtw8821ce_rf_set { SWITCH_TO_BTG, SWITCH_TO_WLG, @@ -77,6 +83,9 @@ static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) case RTW_HCI_TYPE_USB: rtw8821cu_efuse_parsing(efuse, map); break; + case RTW_HCI_TYPE_SDIO: + rtw8821cs_efuse_parsing(efuse, map); + break; default: /* unsupported now */ return -ENOTSUPP; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index 1c81260f3a542..fcff31688c453 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -65,6 +65,11 @@ struct rtw8821ce_efuse { u8 res7; }; +struct rtw8821cs_efuse { + u8 res4[0x4a]; /* 0xd0 */ + u8 mac_addr[ETH_ALEN]; /* 0x11a */ +} __packed; + struct rtw8821c_efuse { __le16 rtl_id; u8 res0[0x0e]; @@ -94,6 +99,7 @@ struct rtw8821c_efuse { union { struct rtw8821ce_efuse e; struct rtw8821cu_efuse u; + struct rtw8821cs_efuse s; }; }; -- GitLab From 9e688784b8a13515ee186d0ee1cfab12b6d05241 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 18 Feb 2023 16:29:43 +0100 Subject: [PATCH 0028/2078] wifi: rtw88: rtw8822b: Implement RTL8822BS (SDIO) efuse parsing The efuse of the SDIO RTL8822BS chip has only one known member: the mac address is at offset 0x11a. Add a struct rtw8822bs_efuse describing this and use it for copying the mac address when the SDIO bus is used. Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230218152944.48842-5-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/rtw8822b.c | 9 +++++++++ drivers/net/wireless/realtek/rtw88/rtw8822b.h | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c index 74dfb89b2c948..531b67787e2eb 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c @@ -32,6 +32,12 @@ static void rtw8822bu_efuse_parsing(struct rtw_efuse *efuse, ether_addr_copy(efuse->addr, map->u.mac_addr); } +static void rtw8822bs_efuse_parsing(struct rtw_efuse *efuse, + struct rtw8822b_efuse *map) +{ + ether_addr_copy(efuse->addr, map->s.mac_addr); +} + static int rtw8822b_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) { struct rtw_efuse *efuse = &rtwdev->efuse; @@ -65,6 +71,9 @@ static int rtw8822b_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) case RTW_HCI_TYPE_USB: rtw8822bu_efuse_parsing(efuse, map); break; + case RTW_HCI_TYPE_SDIO: + rtw8822bs_efuse_parsing(efuse, map); + break; default: /* unsupported now */ return -ENOTSUPP; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.h b/drivers/net/wireless/realtek/rtw88/rtw8822b.h index 01d3644e0c946..2dc3a6660f06a 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.h @@ -65,6 +65,11 @@ struct rtw8822be_efuse { u8 res7; }; +struct rtw8822bs_efuse { + u8 res4[0x4a]; /* 0xd0 */ + u8 mac_addr[ETH_ALEN]; /* 0x11a */ +} __packed; + struct rtw8822b_efuse { __le16 rtl_id; u8 res0[0x0e]; @@ -92,8 +97,9 @@ struct rtw8822b_efuse { u8 country_code[2]; u8 res[3]; union { - struct rtw8822bu_efuse u; struct rtw8822be_efuse e; + struct rtw8822bu_efuse u; + struct rtw8822bs_efuse s; }; }; -- GitLab From ad0a677bce20c7fa2e8af3fd7f23c0686018202b Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 18 Feb 2023 16:29:44 +0100 Subject: [PATCH 0029/2078] wifi: rtw88: rtw8822c: Implement RTL8822CS (SDIO) efuse parsing The efuse of the SDIO RTL8822CS chip has only one known member: the mac address is at offset 0x16a. Add a struct rtw8822cs_efuse describing this and use it for copying the mac address when the SDIO bus is used. Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230218152944.48842-6-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/rtw8822c.c | 9 +++++++++ drivers/net/wireless/realtek/rtw88/rtw8822c.h | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 964e27887fe2d..5a2c004b12df1 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -35,6 +35,12 @@ static void rtw8822cu_efuse_parsing(struct rtw_efuse *efuse, ether_addr_copy(efuse->addr, map->u.mac_addr); } +static void rtw8822cs_efuse_parsing(struct rtw_efuse *efuse, + struct rtw8822c_efuse *map) +{ + ether_addr_copy(efuse->addr, map->s.mac_addr); +} + static int rtw8822c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) { struct rtw_efuse *efuse = &rtwdev->efuse; @@ -67,6 +73,9 @@ static int rtw8822c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) case RTW_HCI_TYPE_USB: rtw8822cu_efuse_parsing(efuse, map); break; + case RTW_HCI_TYPE_SDIO: + rtw8822cs_efuse_parsing(efuse, map); + break; default: /* unsupported now */ return -ENOTSUPP; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h index 479d5d769c520..1bc0e7f5d6bb1 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h @@ -16,6 +16,11 @@ struct rtw8822cu_efuse { u8 res2[0x3d]; }; +struct rtw8822cs_efuse { + u8 res0[0x4a]; /* 0x120 */ + u8 mac_addr[ETH_ALEN]; /* 0x16a */ +} __packed; + struct rtw8822ce_efuse { u8 mac_addr[ETH_ALEN]; /* 0x120 */ u8 vender_id[2]; @@ -91,8 +96,9 @@ struct rtw8822c_efuse { u8 res9; u8 res10[0x42]; union { - struct rtw8822cu_efuse u; struct rtw8822ce_efuse e; + struct rtw8822cu_efuse u; + struct rtw8822cs_efuse s; }; }; -- GitLab From aa4e055945462e645224795e174c25c82f6002ac Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Mon, 20 Feb 2023 15:01:57 +0800 Subject: [PATCH 0030/2078] wifi: rtw89: add tx_wake notify for 8852B 8852B has the same issue: management frames get stuck when wifi chip enters low ps mode, so we alse add notify wake function to trigger wifi chip wake before forwarding management frames. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index a88f7974c1d6b..9dc0abc1fc37e 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -258,6 +258,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 36, 0, CRASH_TRIGGER), __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 38, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, NO_LPS_PG), + __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 20, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE), -- GitLab From 31c416e69dbf416583d0d8245d91e7e82b7846b1 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Mon, 20 Feb 2023 15:01:58 +0800 Subject: [PATCH 0031/2078] wifi: rtw89: fw: configure CRASH_TRIGGER feature for 8852B RTL8852B firmware supports CRASH_TRIGGER feature from v0.29.29.0. After this is configured, debugfs fw_crash can support type 1 on RTL8852B to trigger firmware crash and verify L2 recovery through simulation. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 9dc0abc1fc37e..d5b1f1b0fd672 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -259,6 +259,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 38, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, NO_LPS_PG), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE), + __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, CRASH_TRIGGER), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 20, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE), -- GitLab From bb9040b3ff970529ceaa20b064c113d5ffd5520f Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Mon, 20 Feb 2023 15:01:59 +0800 Subject: [PATCH 0032/2078] wifi: rtw89: adjust channel encoding to common function Since the range of channel table is identical among ICs. Make channel encode/decode function common and not IC dependent. So all ICs with matching firmware that needs this kind of coding can use it directly. This patch doesn't change logic at all. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/phy.c | 72 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/phy.h | 3 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 74 +------------------ 3 files changed, 77 insertions(+), 72 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index d9f61ba3d1765..d8b035972dd48 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -4294,3 +4294,75 @@ void rtw89_phy_tssi_ctrl_set_bandedge_cfg(struct rtw89_dev *rtwdev, data[RTW89_TSSI_SBW20]); } EXPORT_SYMBOL(rtw89_phy_tssi_ctrl_set_bandedge_cfg); + +static +const u8 rtw89_ch_base_table[16] = {1, 0xff, + 36, 100, 132, 149, 0xff, + 1, 33, 65, 97, 129, 161, 193, 225, 0xff}; +#define RTW89_CH_BASE_IDX_2G 0 +#define RTW89_CH_BASE_IDX_5G_FIRST 2 +#define RTW89_CH_BASE_IDX_5G_LAST 5 +#define RTW89_CH_BASE_IDX_6G_FIRST 7 +#define RTW89_CH_BASE_IDX_6G_LAST 14 + +#define RTW89_CH_BASE_IDX_MASK GENMASK(7, 4) +#define RTW89_CH_OFFSET_MASK GENMASK(3, 0) + +u8 rtw89_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band) +{ + u8 chan_idx; + u8 last, first; + u8 idx; + + switch (band) { + case RTW89_BAND_2G: + chan_idx = FIELD_PREP(RTW89_CH_BASE_IDX_MASK, RTW89_CH_BASE_IDX_2G) | + FIELD_PREP(RTW89_CH_OFFSET_MASK, central_ch); + return chan_idx; + case RTW89_BAND_5G: + first = RTW89_CH_BASE_IDX_5G_FIRST; + last = RTW89_CH_BASE_IDX_5G_LAST; + break; + case RTW89_BAND_6G: + first = RTW89_CH_BASE_IDX_6G_FIRST; + last = RTW89_CH_BASE_IDX_6G_LAST; + break; + default: + rtw89_warn(rtwdev, "Unsupported band %d\n", band); + return 0; + } + + for (idx = last; idx >= first; idx--) + if (central_ch >= rtw89_ch_base_table[idx]) + break; + + if (idx < first) { + rtw89_warn(rtwdev, "Unknown band %d channel %d\n", band, central_ch); + return 0; + } + + chan_idx = FIELD_PREP(RTW89_CH_BASE_IDX_MASK, idx) | + FIELD_PREP(RTW89_CH_OFFSET_MASK, + (central_ch - rtw89_ch_base_table[idx]) >> 1); + return chan_idx; +} +EXPORT_SYMBOL(rtw89_encode_chan_idx); + +void rtw89_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx, + u8 *ch, enum nl80211_band *band) +{ + u8 idx, offset; + + idx = FIELD_GET(RTW89_CH_BASE_IDX_MASK, chan_idx); + offset = FIELD_GET(RTW89_CH_OFFSET_MASK, chan_idx); + + if (idx == RTW89_CH_BASE_IDX_2G) { + *band = NL80211_BAND_2GHZ; + *ch = offset; + return; + } + + *band = idx <= RTW89_CH_BASE_IDX_5G_LAST ? NL80211_BAND_5GHZ : NL80211_BAND_6GHZ; + *ch = rtw89_ch_base_table[idx] + (offset << 1); +} +EXPORT_SYMBOL(rtw89_decode_chan_idx); diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index 21233f094644b..de0a9abf646e3 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -555,5 +555,8 @@ void rtw89_phy_tssi_ctrl_set_bandedge_cfg(struct rtw89_dev *rtwdev, enum rtw89_tssi_bandedge_cfg bandedge_cfg); void rtw89_phy_ul_tb_assoc(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif); void rtw89_phy_ul_tb_ctrl_track(struct rtw89_dev *rtwdev); +u8 rtw89_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band); +void rtw89_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx, + u8 *ch, enum nl80211_band *band); #endif diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index d2dde21d3daf5..8af813132f71d 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -852,76 +852,6 @@ static void rtw8852c_set_gain_error(struct rtw89_dev *rtwdev, } } -static -const u8 rtw8852c_ch_base_table[16] = {1, 0xff, - 36, 100, 132, 149, 0xff, - 1, 33, 65, 97, 129, 161, 193, 225, 0xff}; -#define RTW8852C_CH_BASE_IDX_2G 0 -#define RTW8852C_CH_BASE_IDX_5G_FIRST 2 -#define RTW8852C_CH_BASE_IDX_5G_LAST 5 -#define RTW8852C_CH_BASE_IDX_6G_FIRST 7 -#define RTW8852C_CH_BASE_IDX_6G_LAST 14 - -#define RTW8852C_CH_BASE_IDX_MASK GENMASK(7, 4) -#define RTW8852C_CH_OFFSET_MASK GENMASK(3, 0) - -static u8 rtw8852c_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band) -{ - u8 chan_idx; - u8 last, first; - u8 idx; - - switch (band) { - case RTW89_BAND_2G: - chan_idx = FIELD_PREP(RTW8852C_CH_BASE_IDX_MASK, RTW8852C_CH_BASE_IDX_2G) | - FIELD_PREP(RTW8852C_CH_OFFSET_MASK, central_ch); - return chan_idx; - case RTW89_BAND_5G: - first = RTW8852C_CH_BASE_IDX_5G_FIRST; - last = RTW8852C_CH_BASE_IDX_5G_LAST; - break; - case RTW89_BAND_6G: - first = RTW8852C_CH_BASE_IDX_6G_FIRST; - last = RTW8852C_CH_BASE_IDX_6G_LAST; - break; - default: - rtw89_warn(rtwdev, "Unsupported band %d\n", band); - return 0; - } - - for (idx = last; idx >= first; idx--) - if (central_ch >= rtw8852c_ch_base_table[idx]) - break; - - if (idx < first) { - rtw89_warn(rtwdev, "Unknown band %d channel %d\n", band, central_ch); - return 0; - } - - chan_idx = FIELD_PREP(RTW8852C_CH_BASE_IDX_MASK, idx) | - FIELD_PREP(RTW8852C_CH_OFFSET_MASK, - (central_ch - rtw8852c_ch_base_table[idx]) >> 1); - return chan_idx; -} - -static void rtw8852c_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx, - u8 *ch, enum nl80211_band *band) -{ - u8 idx, offset; - - idx = FIELD_GET(RTW8852C_CH_BASE_IDX_MASK, chan_idx); - offset = FIELD_GET(RTW8852C_CH_OFFSET_MASK, chan_idx); - - if (idx == RTW8852C_CH_BASE_IDX_2G) { - *band = NL80211_BAND_2GHZ; - *ch = offset; - return; - } - - *band = idx <= RTW8852C_CH_BASE_IDX_5G_LAST ? NL80211_BAND_5GHZ : NL80211_BAND_6GHZ; - *ch = rtw8852c_ch_base_table[idx] + (offset << 1); -} - static void rtw8852c_set_gain_offset(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx, @@ -1084,7 +1014,7 @@ static void rtw8852c_ctrl_ch(struct rtw89_dev *rtwdev, } } - chan_idx = rtw8852c_encode_chan_idx(rtwdev, chan->primary_channel, band); + chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band); rtw89_phy_write32_idx(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, chan_idx, phy_idx); } @@ -2730,7 +2660,7 @@ static void rtw8852c_fill_freq_with_ppdu(struct rtw89_dev *rtwdev, if (chan_idx == 0) return; - rtw8852c_decode_chan_idx(rtwdev, chan_idx, &ch, &band); + rtw89_decode_chan_idx(rtwdev, chan_idx, &ch, &band); status->freq = ieee80211_channel_to_frequency(ch, band); status->band = band; } -- GitLab From 4f24d7aa575a3c01192faa7f4fb6a91c54f4ef47 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Mon, 20 Feb 2023 15:02:00 +0800 Subject: [PATCH 0033/2078] wifi: rtw89: 8852b: add channel encoding for hw_scan To obtain correct packet frequency for hw_scan, 52b needs to decode the channel index obtained from hardware. Change the driver related set channel part as well to make driver/firmware compatible. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index ee8dba7e0074a..499ae0389c715 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -1422,6 +1422,7 @@ static void rtw8852b_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89 { bool cck_en = chan->channel <= 14; u8 pri_ch_idx = chan->pri_ch_idx; + u8 band = chan->band_type, chan_idx; if (cck_en) rtw8852b_ctrl_sco_cck(rtwdev, chan->primary_channel); @@ -1444,8 +1445,8 @@ static void rtw8852b_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89 B_BT_DYN_DC_EST_EN_MSK, 0x0); rtw89_phy_write32_mask(rtwdev, R_GNT_BT_WGT_EN, B_GNT_BT_WGT_EN, 0x0); } - rtw89_phy_write32_mask(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, - chan->primary_channel); + chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band); + rtw89_phy_write32_mask(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, chan_idx); rtw8852b_5m_mask(rtwdev, chan, phy_idx); rtw8852b_bb_set_pop(rtwdev); rtw8852b_bb_reset_all(rtwdev, phy_idx); @@ -2299,13 +2300,14 @@ static void rtw8852b_fill_freq_with_ppdu(struct rtw89_dev *rtwdev, struct ieee80211_rx_status *status) { u16 chan = phy_ppdu->chan_idx; - u8 band; + enum nl80211_band band; + u8 ch; if (chan == 0) return; - band = chan <= 14 ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; - status->freq = ieee80211_channel_to_frequency(chan, band); + rtw89_decode_chan_idx(rtwdev, chan, &ch, &band); + status->freq = ieee80211_channel_to_frequency(ch, band); status->band = band; } -- GitLab From 357277e1afdae0f56d08846e818579f5a020709d Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 20 Feb 2023 15:02:01 +0800 Subject: [PATCH 0034/2078] wifi: rtw89: 8852b: enable hw_scan support This enables hw_scan for 8852b after firmware version 0.29.29.0. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index d5b1f1b0fd672..dceb033acbc89 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -260,6 +260,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, NO_LPS_PG), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, CRASH_TRIGGER), + __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, SCAN_OFFLOAD), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 20, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE), -- GitLab From 0d1f7ff19d4f8a6da5a6b60d2afd1d34b5d5ebfc Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Mon, 20 Feb 2023 15:02:02 +0800 Subject: [PATCH 0035/2078] wifi: rtw89: refine FW feature judgement on packet drop The newer chips use the newer firmware branches, e.g. v027, v029. And, those firmware branches are supposed to support packet drop when they are just branched out. The initial firmware branch used by each chip is as below. * 8852A: v009 * 8852C: v027 * 8852B: v027 So, only 8852A may use firmware which doesn't support packet drop at runtime. To save trivial positive listing in firmware feature table, we change to reverse judgment. Besides, rtw89_mac_ptk_drop_by_band_and_wait() missed to check firmware feature before calling rtw89_fw_h2c_pkt_drop(). We also fix it. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230220070202.29868-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 2 +- drivers/net/wireless/realtek/rtw89/fw.c | 4 ++-- drivers/net/wireless/realtek/rtw89/mac.c | 2 +- drivers/net/wireless/realtek/rtw89/mac80211.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 41365ffb7e5ea..b1a886898c5a0 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -3023,7 +3023,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_SCAN_OFFLOAD, RTW89_FW_FEATURE_TX_WAKE, RTW89_FW_FEATURE_CRASH_TRIGGER, - RTW89_FW_FEATURE_PACKET_DROP, + RTW89_FW_FEATURE_NO_PACKET_DROP, RTW89_FW_FEATURE_NO_DEEP_PS, RTW89_FW_FEATURE_NO_LPS_PG, }; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index dceb033acbc89..1a4ff24078fb9 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -235,6 +235,7 @@ static bool __fw_feat_cond_ ## __cond(u32 suit_ver_code, u32 comp_ver_code) \ __DEF_FW_FEAT_COND(ge, >=); /* greater or equal */ __DEF_FW_FEAT_COND(le, <=); /* less or equal */ +__DEF_FW_FEAT_COND(lt, <); /* less than */ struct __fw_feat_cfg { enum rtw89_core_chip_id chip_id; @@ -256,12 +257,11 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 35, 0, SCAN_OFFLOAD), __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 35, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 36, 0, CRASH_TRIGGER), - __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 38, 0, PACKET_DROP), + __CFG_FW_FEAT(RTL8852A, lt, 0, 13, 38, 0, NO_PACKET_DROP), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, NO_LPS_PG), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, CRASH_TRIGGER), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, SCAN_OFFLOAD), - __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 20, 0, PACKET_DROP), __CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 36, 0, SCAN_OFFLOAD), diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 2e2a2b6eab09d..3d1e4ffef1b16 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -5426,7 +5426,7 @@ int rtw89_mac_ptk_drop_by_band_and_wait(struct rtw89_dev *rtwdev, for (i = 0; i < try_cnt; i++) { ret = read_poll_timeout(mac_is_txq_empty, empty, empty, 50, 50000, false, rtwdev); - if (ret) + if (ret && !RTW89_CHK_FW_FEATURE(NO_PACKET_DROP, &rtwdev->fw)) rtw89_fw_h2c_pkt_drop(rtwdev, ¶ms); else return 0; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index d43281f7335b1..367a7bf319dae 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -676,7 +676,7 @@ static void rtw89_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, rtw89_leave_lps(rtwdev); rtw89_hci_flush_queues(rtwdev, queues, drop); - if (drop && RTW89_CHK_FW_FEATURE(PACKET_DROP, &rtwdev->fw)) + if (drop && !RTW89_CHK_FW_FEATURE(NO_PACKET_DROP, &rtwdev->fw)) __rtw89_drop_packets(rtwdev, vif); else rtw89_mac_flush_txq(rtwdev, queues, drop); -- GitLab From 0a09a2f933c73dc76ab0b72da6855f44342a8903 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 21 Feb 2023 21:06:42 +0100 Subject: [PATCH 0036/2078] bpf: Annotate data races in bpf_local_storage There are a few cases where hlist_node is checked to be unhashed without holding the lock protecting its modification. In this case, one must use hlist_unhashed_lockless to avoid load tearing and KCSAN reports. Fix this by using lockless variant in places not protected by the lock. Since this is not prompted by any actual KCSAN reports but only from code review, I have not included a fixes tag. Cc: Martin KaFai Lau Cc: KP Singh Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230221200646.2500777-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 35f4138a54dc1..58da17ae51241 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner) return map->ops->map_owner_storage_ptr(owner); } +static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->snode); +} + static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); } +static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->map_node); +} + static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); @@ -174,7 +184,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, bool free_local_storage = false; unsigned long flags; - if (unlikely(!selem_linked_to_storage(selem))) + if (unlikely(!selem_linked_to_storage_lockless(selem))) /* selem has already been unlinked from sk */ return; @@ -208,7 +218,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) struct bpf_local_storage_map_bucket *b; unsigned long flags; - if (unlikely(!selem_linked_to_map(selem))) + if (unlikely(!selem_linked_to_map_lockless(selem))) /* selem has already be unlinked from smap */ return; @@ -420,7 +430,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = check_flags(old_sdata, map_flags); if (err) return ERR_PTR(err); - if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) { + if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { copy_map_value_locked(&smap->map, old_sdata->data, value, false); return old_sdata; -- GitLab From 521d3c0a1730c29c96870919a7a115577e17f8c7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 21 Feb 2023 21:06:43 +0100 Subject: [PATCH 0037/2078] bpf: Remove unused MEM_ALLOC | PTR_TRUSTED checks The plan is to supposedly tag everything with PTR_TRUSTED eventually, however those changes should bring in their respective code, instead of leaving it around right now. It is arguable whether PTR_TRUSTED is required for all types, when it's only use case is making PTR_TO_BTF_ID a bit stronger, while all other types are trusted by default. Hence, just drop the two instances which do not occur in the verifier for now to avoid reader confusion. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230221200646.2500777-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d517d13878cfe..477c22c9bbd7a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6656,7 +6656,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_BTF_ID | MEM_ALLOC: case PTR_TO_BTF_ID | PTR_TRUSTED: case PTR_TO_BTF_ID | MEM_RCU: - case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: /* When referenced PTR_TO_BTF_ID is passed to release function, * its fixed offset must be 0. In the other cases, fixed offset @@ -9211,7 +9210,6 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ ptr = reg->map_ptr; break; case PTR_TO_BTF_ID | MEM_ALLOC: - case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: ptr = reg->btf; break; default: -- GitLab From da03e43a8c500fcfb11ac5eeb03c1b4a9c1dd958 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 21 Feb 2023 21:06:44 +0100 Subject: [PATCH 0038/2078] bpf: Fix check_reg_type for PTR_TO_BTF_ID The current code does type matching for the case where reg->type is PTR_TO_BTF_ID or has the PTR_TRUSTED flag. However, this only needs to occur for non-MEM_ALLOC and non-MEM_PERCPU cases, but will include both as per the current code. The MEM_ALLOC case with or without PTR_TRUSTED needs to be handled specially by the code for type_is_alloc case, while MEM_PERCPU case must be ignored. Hence, to restore correct behavior and for clarity, explicitly list out the handled PTR_TO_BTF_ID types which should be handled for each case using a switch statement. Helpers currently only take: PTR_TO_BTF_ID PTR_TO_BTF_ID | PTR_TRUSTED PTR_TO_BTF_ID | MEM_RCU PTR_TO_BTF_ID | MEM_ALLOC PTR_TO_BTF_ID | MEM_PERCPU PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED This fix was also described (for the MEM_ALLOC case) in [0]. [0]: https://lore.kernel.org/bpf/20221121160657.h6z7xuvedybp5y7s@apollo Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230221200646.2500777-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 477c22c9bbd7a..062fd4a052345 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6527,7 +6527,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, return -EACCES; found: - if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) { + if (base_type(reg->type) != PTR_TO_BTF_ID) + return 0; + + switch ((int)reg->type) { + case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_RCU: + { /* For bpf_sk_release, it needs to match against first member * 'struct sock_common', hence make an exception for it. This * allows bpf_sk_release to work for multiple socket types. @@ -6563,13 +6570,23 @@ found: return -EACCES; } } - } else if (type_is_alloc(reg->type)) { + break; + } + case PTR_TO_BTF_ID | MEM_ALLOC: if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) { verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } + /* Handled by helper specific checks */ + break; + case PTR_TO_BTF_ID | MEM_PERCPU: + case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: + /* Handled by helper specific checks */ + break; + default: + verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n"); + return -EFAULT; } - return 0; } -- GitLab From dbd8d22863e83ee2834642e4cfd3bdacb8a1c975 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 21 Feb 2023 21:06:45 +0100 Subject: [PATCH 0039/2078] bpf: Wrap register invalidation with a helper Typically, verifier should use env->allow_ptr_leaks when invaliding registers for users that don't have CAP_PERFMON or CAP_SYS_ADMIN to avoid leaking the pointer value. This is similar in spirit to c67cae551f0d ("bpf: Tighten ptr_to_btf_id checks."). In a lot of the existing checks, we know the capabilities are present, hence we don't do the check. Instead of being inconsistent in the application of the check, wrap the action of invalidating a register into a helper named 'mark_invalid_reg' and use it in a uniform fashion to replace open coded invalidation operations, so that the check is always made regardless of the call site and we don't have to remember whether it needs to be done or not for each case. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230221200646.2500777-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 062fd4a052345..741cb5107536d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -895,6 +895,14 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg); +static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); +} + static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi) { @@ -934,12 +942,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) continue; - if (dreg->dynptr_id == dynptr_id) { - if (!env->allow_ptr_leaks) - __mark_reg_not_init(env, dreg); - else - __mark_reg_unknown(env, dreg); - } + if (dreg->dynptr_id == dynptr_id) + mark_reg_invalid(env, dreg); })); /* Do not release reference state, we are destroying dynptr on stack, @@ -7384,7 +7388,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg_is_pkt_pointer_any(reg)) - __mark_reg_unknown(env, reg); + mark_reg_invalid(env, reg); })); } @@ -7429,12 +7433,8 @@ static int release_reference(struct bpf_verifier_env *env, return err; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) { - if (!env->allow_ptr_leaks) - __mark_reg_not_init(env, reg); - else - __mark_reg_unknown(env, reg); - } + if (reg->ref_obj_id == ref_obj_id) + mark_reg_invalid(env, reg); })); return 0; @@ -7447,7 +7447,7 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env) bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ if (type_is_non_owning_ref(reg->type)) - __mark_reg_unknown(env, reg); + mark_reg_invalid(env, reg); })); } -- GitLab From 5d5de3a431d87ac51d43da8d796891d014975ab7 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 16 Feb 2023 10:48:21 +0800 Subject: [PATCH 0040/2078] bpf: Only allocate one bpf_mem_cache for bpf_cpumask_ma The size of bpf_cpumask is fixed, so there is no need to allocate many bpf_mem_caches for bpf_cpumask_ma, just one bpf_mem_cache is enough. Also add comments for bpf_mem_alloc_init() in bpf_mem_alloc.h to prevent future miuse. Signed-off-by: Hou Tao Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20230216024821.2202916-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_mem_alloc.h | 7 +++++++ kernel/bpf/cpumask.c | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index 3e164b8efaa92..a7104af61ab4d 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -14,6 +14,13 @@ struct bpf_mem_alloc { struct work_struct work; }; +/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects. + * Alloc and free are done with bpf_mem_cache_{alloc,free}(). + * + * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects. + * Alloc and free are done with bpf_mem_{alloc,free}() and the size of + * the returned object is given by the size argument of bpf_mem_alloc(). + */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 52b981512a351..2b3fbbfebdc5f 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -55,7 +55,7 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void) /* cpumask must be the first element so struct bpf_cpumask be cast to struct cpumask. */ BUILD_BUG_ON(offsetof(struct bpf_cpumask, cpumask) != 0); - cpumask = bpf_mem_alloc(&bpf_cpumask_ma, sizeof(*cpumask)); + cpumask = bpf_mem_cache_alloc(&bpf_cpumask_ma); if (!cpumask) return NULL; @@ -123,7 +123,7 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) if (refcount_dec_and_test(&cpumask->usage)) { migrate_disable(); - bpf_mem_free(&bpf_cpumask_ma, cpumask); + bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); migrate_enable(); } } @@ -468,7 +468,7 @@ static int __init cpumask_kfunc_init(void) }, }; - ret = bpf_mem_alloc_init(&bpf_cpumask_ma, 0, false); + ret = bpf_mem_alloc_init(&bpf_cpumask_ma, sizeof(struct bpf_cpumask), false); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &cpumask_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &cpumask_kfunc_set); return ret ?: register_btf_id_dtor_kfuncs(cpumask_dtors, -- GitLab From 1f265d2aea0dff1f2f9ecd41f545d601869462c5 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 16 Feb 2023 15:51:01 +0800 Subject: [PATCH 0041/2078] selftests/bpf: Remove not used headers The following three uapi headers: tools/arch/arm64/include/uapi/asm/bpf_perf_event.h tools/arch/s390/include/uapi/asm/bpf_perf_event.h tools/arch/s390/include/uapi/asm/ptrace.h were introduced in commit 618e165b2a8e ("selftests/bpf: sync kernel headers and introduce arch support in Makefile"), they are not used any more after commit 720f228e8d31 ("bpf: fix broken BPF selftest build"), so remove them. Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1676533861-27508-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Alexei Starovoitov --- .../arm64/include/uapi/asm/bpf_perf_event.h | 9 - .../s390/include/uapi/asm/bpf_perf_event.h | 9 - tools/arch/s390/include/uapi/asm/ptrace.h | 458 ------------------ 3 files changed, 476 deletions(-) delete mode 100644 tools/arch/arm64/include/uapi/asm/bpf_perf_event.h delete mode 100644 tools/arch/s390/include/uapi/asm/bpf_perf_event.h delete mode 100644 tools/arch/s390/include/uapi/asm/ptrace.h diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index b551b741653d2..0000000000000 --- a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 0a8e37a519f25..0000000000000 --- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include "ptrace.h" - -typedef user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h deleted file mode 100644 index ad64d673b5e6b..0000000000000 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ /dev/null @@ -1,458 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * Copyright IBM Corp. 1999, 2000 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - */ - -#ifndef _UAPI_S390_PTRACE_H -#define _UAPI_S390_PTRACE_H - -/* - * Offsets in the user_regs_struct. They are used for the ptrace - * system call and in entry.S - */ -#ifndef __s390x__ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x04 -#define PT_GPR0 0x08 -#define PT_GPR1 0x0C -#define PT_GPR2 0x10 -#define PT_GPR3 0x14 -#define PT_GPR4 0x18 -#define PT_GPR5 0x1C -#define PT_GPR6 0x20 -#define PT_GPR7 0x24 -#define PT_GPR8 0x28 -#define PT_GPR9 0x2C -#define PT_GPR10 0x30 -#define PT_GPR11 0x34 -#define PT_GPR12 0x38 -#define PT_GPR13 0x3C -#define PT_GPR14 0x40 -#define PT_GPR15 0x44 -#define PT_ACR0 0x48 -#define PT_ACR1 0x4C -#define PT_ACR2 0x50 -#define PT_ACR3 0x54 -#define PT_ACR4 0x58 -#define PT_ACR5 0x5C -#define PT_ACR6 0x60 -#define PT_ACR7 0x64 -#define PT_ACR8 0x68 -#define PT_ACR9 0x6C -#define PT_ACR10 0x70 -#define PT_ACR11 0x74 -#define PT_ACR12 0x78 -#define PT_ACR13 0x7C -#define PT_ACR14 0x80 -#define PT_ACR15 0x84 -#define PT_ORIGGPR2 0x88 -#define PT_FPC 0x90 -/* - * A nasty fact of life that the ptrace api - * only supports passing of longs. - */ -#define PT_FPR0_HI 0x98 -#define PT_FPR0_LO 0x9C -#define PT_FPR1_HI 0xA0 -#define PT_FPR1_LO 0xA4 -#define PT_FPR2_HI 0xA8 -#define PT_FPR2_LO 0xAC -#define PT_FPR3_HI 0xB0 -#define PT_FPR3_LO 0xB4 -#define PT_FPR4_HI 0xB8 -#define PT_FPR4_LO 0xBC -#define PT_FPR5_HI 0xC0 -#define PT_FPR5_LO 0xC4 -#define PT_FPR6_HI 0xC8 -#define PT_FPR6_LO 0xCC -#define PT_FPR7_HI 0xD0 -#define PT_FPR7_LO 0xD4 -#define PT_FPR8_HI 0xD8 -#define PT_FPR8_LO 0XDC -#define PT_FPR9_HI 0xE0 -#define PT_FPR9_LO 0xE4 -#define PT_FPR10_HI 0xE8 -#define PT_FPR10_LO 0xEC -#define PT_FPR11_HI 0xF0 -#define PT_FPR11_LO 0xF4 -#define PT_FPR12_HI 0xF8 -#define PT_FPR12_LO 0xFC -#define PT_FPR13_HI 0x100 -#define PT_FPR13_LO 0x104 -#define PT_FPR14_HI 0x108 -#define PT_FPR14_LO 0x10C -#define PT_FPR15_HI 0x110 -#define PT_FPR15_LO 0x114 -#define PT_CR_9 0x118 -#define PT_CR_10 0x11C -#define PT_CR_11 0x120 -#define PT_IEEE_IP 0x13C -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x140-1 - -#define GPR_SIZE 4 -#define CR_SIZE 4 - -#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ - -#else /* __s390x__ */ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x08 -#define PT_GPR0 0x10 -#define PT_GPR1 0x18 -#define PT_GPR2 0x20 -#define PT_GPR3 0x28 -#define PT_GPR4 0x30 -#define PT_GPR5 0x38 -#define PT_GPR6 0x40 -#define PT_GPR7 0x48 -#define PT_GPR8 0x50 -#define PT_GPR9 0x58 -#define PT_GPR10 0x60 -#define PT_GPR11 0x68 -#define PT_GPR12 0x70 -#define PT_GPR13 0x78 -#define PT_GPR14 0x80 -#define PT_GPR15 0x88 -#define PT_ACR0 0x90 -#define PT_ACR1 0x94 -#define PT_ACR2 0x98 -#define PT_ACR3 0x9C -#define PT_ACR4 0xA0 -#define PT_ACR5 0xA4 -#define PT_ACR6 0xA8 -#define PT_ACR7 0xAC -#define PT_ACR8 0xB0 -#define PT_ACR9 0xB4 -#define PT_ACR10 0xB8 -#define PT_ACR11 0xBC -#define PT_ACR12 0xC0 -#define PT_ACR13 0xC4 -#define PT_ACR14 0xC8 -#define PT_ACR15 0xCC -#define PT_ORIGGPR2 0xD0 -#define PT_FPC 0xD8 -#define PT_FPR0 0xE0 -#define PT_FPR1 0xE8 -#define PT_FPR2 0xF0 -#define PT_FPR3 0xF8 -#define PT_FPR4 0x100 -#define PT_FPR5 0x108 -#define PT_FPR6 0x110 -#define PT_FPR7 0x118 -#define PT_FPR8 0x120 -#define PT_FPR9 0x128 -#define PT_FPR10 0x130 -#define PT_FPR11 0x138 -#define PT_FPR12 0x140 -#define PT_FPR13 0x148 -#define PT_FPR14 0x150 -#define PT_FPR15 0x158 -#define PT_CR_9 0x160 -#define PT_CR_10 0x168 -#define PT_CR_11 0x170 -#define PT_IEEE_IP 0x1A8 -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x1B0-1 - -#define GPR_SIZE 8 -#define CR_SIZE 8 - -#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ - -#endif /* __s390x__ */ - -#define NUM_GPRS 16 -#define NUM_FPRS 16 -#define NUM_CRS 16 -#define NUM_ACRS 16 - -#define NUM_CR_WORDS 3 - -#define FPR_SIZE 8 -#define FPC_SIZE 4 -#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ -#define ACR_SIZE 4 - - -#define PTRACE_OLDSETOPTIONS 21 -#define PTRACE_SYSEMU 31 -#define PTRACE_SYSEMU_SINGLESTEP 32 -#ifndef __ASSEMBLY__ -#include -#include - -typedef union { - float f; - double d; - __u64 ui; - struct - { - __u32 hi; - __u32 lo; - } fp; -} freg_t; - -typedef struct { - __u32 fpc; - __u32 pad; - freg_t fprs[NUM_FPRS]; -} s390_fp_regs; - -#define FPC_EXCEPTION_MASK 0xF8000000 -#define FPC_FLAGS_MASK 0x00F80000 -#define FPC_DXC_MASK 0x0000FF00 -#define FPC_RM_MASK 0x00000003 - -/* this typedef defines how a Program Status Word looks like */ -typedef struct { - unsigned long mask; - unsigned long addr; -} __attribute__ ((aligned(8))) psw_t; - -#ifndef __s390x__ - -#define PSW_MASK_PER 0x40000000UL -#define PSW_MASK_DAT 0x04000000UL -#define PSW_MASK_IO 0x02000000UL -#define PSW_MASK_EXT 0x01000000UL -#define PSW_MASK_KEY 0x00F00000UL -#define PSW_MASK_BASE 0x00080000UL /* always one */ -#define PSW_MASK_MCHECK 0x00040000UL -#define PSW_MASK_WAIT 0x00020000UL -#define PSW_MASK_PSTATE 0x00010000UL -#define PSW_MASK_ASC 0x0000C000UL -#define PSW_MASK_CC 0x00003000UL -#define PSW_MASK_PM 0x00000F00UL -#define PSW_MASK_RI 0x00000000UL -#define PSW_MASK_EA 0x00000000UL -#define PSW_MASK_BA 0x00000000UL - -#define PSW_MASK_USER 0x0000FF00UL - -#define PSW_ADDR_AMODE 0x80000000UL -#define PSW_ADDR_INSN 0x7FFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) - -#define PSW_ASC_PRIMARY 0x00000000UL -#define PSW_ASC_ACCREG 0x00004000UL -#define PSW_ASC_SECONDARY 0x00008000UL -#define PSW_ASC_HOME 0x0000C000UL - -#else /* __s390x__ */ - -#define PSW_MASK_PER 0x4000000000000000UL -#define PSW_MASK_DAT 0x0400000000000000UL -#define PSW_MASK_IO 0x0200000000000000UL -#define PSW_MASK_EXT 0x0100000000000000UL -#define PSW_MASK_BASE 0x0000000000000000UL -#define PSW_MASK_KEY 0x00F0000000000000UL -#define PSW_MASK_MCHECK 0x0004000000000000UL -#define PSW_MASK_WAIT 0x0002000000000000UL -#define PSW_MASK_PSTATE 0x0001000000000000UL -#define PSW_MASK_ASC 0x0000C00000000000UL -#define PSW_MASK_CC 0x0000300000000000UL -#define PSW_MASK_PM 0x00000F0000000000UL -#define PSW_MASK_RI 0x0000008000000000UL -#define PSW_MASK_EA 0x0000000100000000UL -#define PSW_MASK_BA 0x0000000080000000UL - -#define PSW_MASK_USER 0x0000FF0180000000UL - -#define PSW_ADDR_AMODE 0x0000000000000000UL -#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) - -#define PSW_ASC_PRIMARY 0x0000000000000000UL -#define PSW_ASC_ACCREG 0x0000400000000000UL -#define PSW_ASC_SECONDARY 0x0000800000000000UL -#define PSW_ASC_HOME 0x0000C00000000000UL - -#endif /* __s390x__ */ - - -/* - * The s390_regs structure is used to define the elf_gregset_t. - */ -typedef struct { - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; -} s390_regs; - -/* - * The user_pt_regs structure exports the beginning of - * the in-kernel pt_regs structure to user space. - */ -typedef struct { - unsigned long args[1]; - psw_t psw; - unsigned long gprs[NUM_GPRS]; -} user_pt_regs; - -/* - * Now for the user space program event recording (trace) definitions. - * The following structures are used only for the ptrace interface, don't - * touch or even look at it if you don't want to modify the user-space - * ptrace interface. In particular stay away from it for in-kernel PER. - */ -typedef struct { - unsigned long cr[NUM_CR_WORDS]; -} per_cr_words; - -#define PER_EM_MASK 0xE8000000UL - -typedef struct { -#ifdef __s390x__ - unsigned : 32; -#endif /* __s390x__ */ - unsigned em_branching : 1; - unsigned em_instruction_fetch : 1; - /* - * Switching on storage alteration automatically fixes - * the storage alteration event bit in the users std. - */ - unsigned em_storage_alteration : 1; - unsigned em_gpr_alt_unused : 1; - unsigned em_store_real_address : 1; - unsigned : 3; - unsigned branch_addr_ctl : 1; - unsigned : 1; - unsigned storage_alt_space_ctl : 1; - unsigned : 21; - unsigned long starting_addr; - unsigned long ending_addr; -} per_cr_bits; - -typedef struct { - unsigned short perc_atmid; - unsigned long address; - unsigned char access_id; -} per_lowcore_words; - -typedef struct { - unsigned perc_branching : 1; - unsigned perc_instruction_fetch : 1; - unsigned perc_storage_alteration : 1; - unsigned perc_gpr_alt_unused : 1; - unsigned perc_store_real_address : 1; - unsigned : 3; - unsigned atmid_psw_bit_31 : 1; - unsigned atmid_validity_bit : 1; - unsigned atmid_psw_bit_32 : 1; - unsigned atmid_psw_bit_5 : 1; - unsigned atmid_psw_bit_16 : 1; - unsigned atmid_psw_bit_17 : 1; - unsigned si : 2; - unsigned long address; - unsigned : 4; - unsigned access_id : 4; -} per_lowcore_bits; - -typedef struct { - union { - per_cr_words words; - per_cr_bits bits; - } control_regs; - /* - * The single_step and instruction_fetch bits are obsolete, - * the kernel always sets them to zero. To enable single - * stepping use ptrace(PTRACE_SINGLESTEP) instead. - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - unsigned long starting_addr; - unsigned long ending_addr; - union { - per_lowcore_words words; - per_lowcore_bits bits; - } lowcore; -} per_struct; - -typedef struct { - unsigned int len; - unsigned long kernel_addr; - unsigned long process_addr; -} ptrace_area; - -/* - * S/390 specific non posix ptrace requests. I chose unusual values so - * they are unlikely to clash with future ptrace definitions. - */ -#define PTRACE_PEEKUSR_AREA 0x5000 -#define PTRACE_POKEUSR_AREA 0x5001 -#define PTRACE_PEEKTEXT_AREA 0x5002 -#define PTRACE_PEEKDATA_AREA 0x5003 -#define PTRACE_POKETEXT_AREA 0x5004 -#define PTRACE_POKEDATA_AREA 0x5005 -#define PTRACE_GET_LAST_BREAK 0x5006 -#define PTRACE_PEEK_SYSTEM_CALL 0x5007 -#define PTRACE_POKE_SYSTEM_CALL 0x5008 -#define PTRACE_ENABLE_TE 0x5009 -#define PTRACE_DISABLE_TE 0x5010 -#define PTRACE_TE_ABORT_RAND 0x5011 - -/* - * The numbers chosen here are somewhat arbitrary but absolutely MUST - * not overlap with any of the number assigned in . - */ -#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ - -/* - * PT_PROT definition is loosely based on hppa bsd definition in - * gdb/hppab-nat.c - */ -#define PTRACE_PROT 21 - -typedef enum { - ptprot_set_access_watchpoint, - ptprot_set_write_watchpoint, - ptprot_disable_watchpoint -} ptprot_flags; - -typedef struct { - unsigned long lowaddr; - unsigned long hiaddr; - ptprot_flags prot; -} ptprot_area; - -/* Sequence of bytes for breakpoint illegal instruction. */ -#define S390_BREAKPOINT {0x0,0x1} -#define S390_BREAKPOINT_U16 ((__u16)0x0001) -#define S390_SYSCALL_OPCODE ((__u16)0x0a00) -#define S390_SYSCALL_SIZE 2 - -/* - * The user_regs_struct defines the way the user registers are - * store on the stack for signal handling. - */ -struct user_regs_struct { - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - per_struct per_info; - unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ -}; - -#endif /* __ASSEMBLY__ */ - -#endif /* _UAPI_S390_PTRACE_H */ -- GitLab From b539a287baaa8501b3af4f7f99aba3c0b1d822f8 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 17 Feb 2023 16:18:32 +0100 Subject: [PATCH 0042/2078] selftests/bpf: Fix cross compilation with CLANG_CROSS_FLAGS I cross-compile my BPF selftests with the following command: CLANG_CROSS_FLAGS="--target=aarch64-linux-gnu --sysroot=/sysroot/" \ make LLVM=1 CC=clang CROSS_COMPILE=aarch64-linux-gnu- SRCARCH=arm64 (Note the use of CLANG_CROSS_FLAGS to specify a custom sysroot instead of letting clang use gcc's default sysroot) However, CLANG_CROSS_FLAGS gets propagated to host tools builds (libbpf and bpftool) and because they reference it directly in their Makefiles, they end up cross-compiling host objects which results in linking errors. This patch ensures that CLANG_CROSS_FLAGS is reset if CROSS_COMPILE isn't set (for example when reaching a BPF host tool build). Signed-off-by: Florent Revest Link: https://lore.kernel.org/r/20230217151832.27784-1-revest@chromium.org Signed-off-by: Alexei Starovoitov --- tools/scripts/Makefile.include | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0efb8f2b33cef..ff527ac065cf8 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -108,6 +108,8 @@ endif # GCC_TOOLCHAIN_DIR endif # CLANG_CROSS_FLAGS CFLAGS += $(CLANG_CROSS_FLAGS) AFLAGS += $(CLANG_CROSS_FLAGS) +else +CLANG_CROSS_FLAGS := endif # CROSS_COMPILE # Hack to avoid type-punned warnings on old systems such as RHEL5: -- GitLab From bb035ef0cc91e115faa80187ac8886a7f1914d06 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 18 Feb 2023 10:53:17 +0000 Subject: [PATCH 0043/2078] LoongArch: BPF: Support mixing bpf2bpf and tailcalls The current implementation already allow such mixing. Let's enable it in JIT. Signed-off-by: Hengqi Chen Link: https://lore.kernel.org/r/20230218105317.4139666-1-hengqi.chen@gmail.com Signed-off-by: Alexei Starovoitov --- arch/loongarch/net/bpf_jit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 288003a9f0cae..e70c846efaa13 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1248,3 +1248,9 @@ out: return prog; } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} -- GitLab From df2ccc180a2e6f6e4343ebee99dcfab4f8af2816 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 20 Feb 2023 17:37:56 +0100 Subject: [PATCH 0044/2078] bpf: Check for helper calls in check_subprogs() The condition src_reg != BPF_PSEUDO_CALL && imm == BPF_FUNC_tail_call may be satisfied by a kfunc call. This would lead to unnecessarily setting has_tail_call. Use src_reg == 0 instead. Signed-off-by: Ilya Leoshkevich Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230220163756.753713-1-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 741cb5107536d..5cb8b623f6397 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2479,8 +2479,8 @@ static int check_subprogs(struct bpf_verifier_env *env) u8 code = insn[i].code; if (code == (BPF_JMP | BPF_CALL) && - insn[i].imm == BPF_FUNC_tail_call && - insn[i].src_reg != BPF_PSEUDO_CALL) + insn[i].src_reg == 0 && + insn[i].imm == BPF_FUNC_tail_call) subprog[cur_subprog].has_tail_call = true; if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) -- GitLab From d40c3847b485acc3522b62b020f77dcd38ca357f Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 21 Feb 2023 22:06:56 +0800 Subject: [PATCH 0045/2078] riscv, bpf: Add kfunc support for RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds kernel function call support for RV64. Since the offset from RV64 kernel and module functions to bpf programs is almost within the range of s32, the current infrastructure of RV64 is already sufficient for kfunc, so let's turn it on. Suggested-by: Björn Töpel Signed-off-by: Pu Lehui Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20230221140656.3480496-1-pulehui@huaweicloud.com Signed-off-by: Alexei Starovoitov --- arch/riscv/net/bpf_jit_comp64.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index f5a668736c79b..a9270366dc577 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1751,3 +1751,8 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} -- GitLab From 9fa02892857ae2b3b699630e5ede28f72106e7e7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 21 Feb 2023 10:05:18 -0800 Subject: [PATCH 0046/2078] selftests/bpf: Fix BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL for empty flow label Kernel's flow dissector continues to parse the packet when the (optional) IPv6 flow label is empty even when instructed to stop (via BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL). Do the same in our reference BPF reimplementation. Signed-off-by: Stanislav Fomichev Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230221180518.2139026-1-sdf@google.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/flow_dissector.c | 24 +++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_flow.c | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 7acca37a3d2b5..c4773173a4e43 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -345,6 +345,30 @@ struct test tests[] = { .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, .retval = BPF_OK, }, + { + .name = "ipv6-empty-flow-label", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.flow_lbl = { 0x00, 0x00, 0x00 }, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .retval = BPF_OK, + }, { .name = "ipip-encap", .pkt.ipip = { diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index a20c5ed5e4546..b04e092fac947 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -337,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->ip_proto = ip6h->nexthdr; keys->flow_label = ip6_flowlabel(ip6h); - if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) + if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) return export_flow_keys(keys, BPF_OK); return parse_ipv6_proto(skb, ip6h->nexthdr); -- GitLab From 746ce767128598711a00d8df5713d4c3b3d9e9a7 Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Mon, 20 Feb 2023 22:37:42 +0000 Subject: [PATCH 0047/2078] bpf, docs: Add explanation of endianness Document the discussion from the email thread on the IETF bpf list, where it was explained that the raw format varies by endianness of the processor. Signed-off-by: Dave Thaler Acked-by: David Vernet Link: https://lore.kernel.org/r/20230220223742.1347-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index af515de5fc38e..01802ed9b29ba 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -38,8 +38,9 @@ eBPF has two instruction encodings: * the wide instruction encoding, which appends a second 64-bit immediate (i.e., constant) value after the basic instruction for a total of 128 bits. -The basic instruction encoding is as follows, where MSB and LSB mean the most significant -bits and least significant bits, respectively: +The basic instruction encoding looks as follows for a little-endian processor, +where MSB and LSB mean the most significant bits and least significant bits, +respectively: ============= ======= ======= ======= ============ 32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) @@ -63,6 +64,17 @@ imm offset src_reg dst_reg opcode **opcode** operation to perform +and as follows for a big-endian processor: + +============= ======= ======= ======= ============ +32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) +============= ======= ======= ======= ============ +imm offset dst_reg src_reg opcode +============= ======= ======= ======= ============ + +Multi-byte fields ('imm' and 'offset') are similarly stored in +the byte order of the processor. + Note that most instructions do not use all of the fields. Unused fields shall be cleared to zero. -- GitLab From 332ea1f697be148bd5e66475d82b5ecc5084da65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2023 15:29:12 -1000 Subject: [PATCH 0048/2078] bpf: Add bpf_cgroup_from_id() kfunc cgroup ID is an userspace-visible 64bit value uniquely identifying a given cgroup. As the IDs are used widely, it's useful to be able to look up the matching cgroups. Add bpf_cgroup_from_id(). v2: Separate out selftest into its own patch as suggested by Alexei. Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/Y/bBaG96t0/gQl9/@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 10 +++++++--- kernel/bpf/helpers.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index ca96ef3f6896d..226313747be56 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -583,13 +583,17 @@ Here's an example of how it can be used: ---- -Another kfunc available for interacting with ``struct cgroup *`` objects is -bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup, -and return it as a cgroup kptr. +Other kfuncs available for interacting with ``struct cgroup *`` objects are +bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access +the ancestor of a cgroup and find a cgroup by its ID, respectively. Both +return a cgroup kptr. .. kernel-doc:: kernel/bpf/helpers.c :identifiers: bpf_cgroup_ancestor +.. kernel-doc:: kernel/bpf/helpers.c + :identifiers: bpf_cgroup_from_id + Eventually, BPF should be updated to allow this to happen with a normal memory load in the program itself. This is currently not possible without more work in the verifier. bpf_cgroup_ancestor() can be used as follows: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5b278a38ae582..a784be6f8bac4 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2101,6 +2101,23 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) cgroup_get(ancestor); return ancestor; } + +/** + * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this + * kfunc which is not subsequently stored in a map, must be released by calling + * bpf_cgroup_release(). + * @cgrp: The cgroup for which we're performing a lookup. + * @level: The level of ancestor to look up. + */ +__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) +{ + struct cgroup *cgrp; + + cgrp = cgroup_get_from_id(cgid); + if (IS_ERR(cgrp)) + return NULL; + return cgrp; +} #endif /* CONFIG_CGROUPS */ /** @@ -2167,6 +2184,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_SET8_END(generic_btf_ids) -- GitLab From d0093aaefa35b80990c05a424dad2396ba4549d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2023 15:29:58 -1000 Subject: [PATCH 0049/2078] selftests/bpf: Add a test case for bpf_cgroup_from_id() Add a test case for bpf_cgroup_from_id. Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/Y/bBlt+tPozcQgws@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/cgrp_kfunc.c | 1 + .../selftests/bpf/progs/cgrp_kfunc_common.h | 1 + .../selftests/bpf/progs/cgrp_kfunc_success.c | 42 +++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index b3f7985c8504b..adda85f970589 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -84,6 +84,7 @@ static const char * const success_tests[] = { "test_cgrp_xchg_release", "test_cgrp_get_release", "test_cgrp_get_ancestors", + "test_cgrp_from_id", }; void test_cgrp_kfunc(void) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 7d30855bfe786..2f8de933b957d 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -24,6 +24,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 0c23ea32df9f9..42e13aebdd627 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -168,3 +168,45 @@ int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) return 0; } + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) +{ + struct cgroup *parent, *res; + u64 parent_cgid; + + if (!is_test_kfunc_task()) + return 0; + + /* @cgrp's ID is not visible yet, let's test with the parent */ + parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!parent) { + err = 1; + return 0; + } + + parent_cgid = parent->kn->id; + bpf_cgroup_release(parent); + + res = bpf_cgroup_from_id(parent_cgid); + if (!res) { + err = 2; + return 0; + } + + bpf_cgroup_release(res); + + if (res != parent) { + err = 3; + return 0; + } + + res = bpf_cgroup_from_id((u64)-1); + if (res) { + bpf_cgroup_release(res); + err = 4; + return 0; + } + + return 0; +} -- GitLab From 4a51e66fe96dfde76894c4eb445ef1b14d312014 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 2 Feb 2023 21:58:10 +0100 Subject: [PATCH 0050/2078] wifi: wcn36xx: Slightly optimize PREPARE_HAL_BUF() In most (likely all) cases, INIT_HAL_MSG() is called before PREPARE_HAL_BUF(). In such cases calling memset() is useless because: msg_body.header.len = sizeof(msg_body) So, instead of writing twice the memory, we just have a sanity check to make sure that some potential trailing memory is zeroed. It even gives the opportunity to see that by itself and optimize it away. Signed-off-by: Christophe JAILLET Acked-by: Loic Poulain Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7d8ab7fee45222cdbaf80c507525f2d3941587c1.1675371372.git.christophe.jaillet@wanadoo.fr --- drivers/net/wireless/ath/wcn36xx/smd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 566f0b9c15841..17e1919d1cd82 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -475,8 +475,8 @@ out: #define PREPARE_HAL_BUF(send_buf, msg_body) \ do { \ - memset(send_buf, 0, msg_body.header.len); \ - memcpy(send_buf, &msg_body, sizeof(msg_body)); \ + memcpy_and_pad(send_buf, msg_body.header.len, \ + &msg_body, sizeof(msg_body), 0); \ } while (0) \ #define PREPARE_HAL_PTT_MSG_BUF(send_buf, p_msg_body) \ -- GitLab From 28013c35629647679e10cb1bbf36c3feec2af756 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 22 Feb 2023 18:40:14 +0200 Subject: [PATCH 0051/2078] wifi: ath12k: remove memset with byte count of 278528 Sparse warns: drivers/net/wireless/ath/ath12k/dp.c:1471:15: warning: memset with byte count of 278528 There's no need to use memset() here, instead call dma_alloc_coherent() with __GFP_ZERO. While at it, remove an extra line before the error handler. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230222164014.860-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/dp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 9926d81c5fe45..ae1645d0f42a2 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1461,15 +1461,12 @@ static int ath12k_dp_reoq_lut_setup(struct ath12k_base *ab) dp->reoq_lut.vaddr = dma_alloc_coherent(ab->dev, DP_REOQ_LUT_SIZE, &dp->reoq_lut.paddr, - GFP_KERNEL); - + GFP_KERNEL | __GFP_ZERO); if (!dp->reoq_lut.vaddr) { ath12k_warn(ab, "failed to allocate memory for reoq table"); return -ENOMEM; } - memset(dp->reoq_lut.vaddr, 0, DP_REOQ_LUT_SIZE); - ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE0(ab), dp->reoq_lut.paddr); return 0; -- GitLab From b61987d37cbee3c44e80304598c60b163553926b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 24 Feb 2023 14:13:42 +0800 Subject: [PATCH 0052/2078] selftests/bpf: move SYS() macro into the test_progs.h A lot of tests defined SYS() macro to run system calls with goto label. Let's move this macro to test_progs.h and add configurable "goto_label" as the first arg. Suggested-by: Martin KaFai Lau Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20230224061343.506571-2-liuhangbin@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/decap_sanity.c | 16 +-- .../selftests/bpf/prog_tests/empty_skb.c | 25 ++--- .../selftests/bpf/prog_tests/fib_lookup.c | 28 ++--- .../selftests/bpf/prog_tests/tc_redirect.c | 100 ++++++++---------- .../selftests/bpf/prog_tests/test_tunnel.c | 71 +++++-------- .../selftests/bpf/prog_tests/xdp_bonding.c | 40 +++---- .../bpf/prog_tests/xdp_do_redirect.c | 30 ++---- .../selftests/bpf/prog_tests/xdp_metadata.c | 23 ++-- .../selftests/bpf/prog_tests/xdp_synproxy.c | 41 ++++--- .../selftests/bpf/prog_tests/xfrm_info.c | 67 +++++------- tools/testing/selftests/bpf/test_progs.h | 15 +++ 11 files changed, 193 insertions(+), 263 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 2853883b7cbb2..5c0ebe6ba8667 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -10,14 +10,6 @@ #include "network_helpers.h" #include "decap_sanity.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "decap_sanity_ns" #define IPV6_IFACE_ADDR "face::1" #define UDP_TEST_PORT 7777 @@ -37,9 +29,9 @@ void test_decap_sanity(void) if (!ASSERT_OK_PTR(skel, "skel open_and_load")) return; - SYS("ip netns add %s", NS_TEST); - SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); - SYS("ip -net %s link set dev lo up", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -80,6 +72,6 @@ fail: bpf_tc_hook_destroy(&qdisc_hook); close_netns(nstoken); } - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); decap_sanity__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 32dd731e9070c..3b77d8a422dbf 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -4,11 +4,6 @@ #include #include "empty_skb.skel.h" -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - void test_empty_skb(void) { LIBBPF_OPTS(bpf_test_run_opts, tattr); @@ -93,18 +88,18 @@ void test_empty_skb(void) }, }; - SYS("ip netns add empty_skb"); + SYS(out, "ip netns add empty_skb"); tok = open_netns("empty_skb"); - SYS("ip link add veth0 type veth peer veth1"); - SYS("ip link set dev veth0 up"); - SYS("ip link set dev veth1 up"); - SYS("ip addr add 10.0.0.1/8 dev veth0"); - SYS("ip addr add 10.0.0.2/8 dev veth1"); + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "ip link set dev veth0 up"); + SYS(out, "ip link set dev veth1 up"); + SYS(out, "ip addr add 10.0.0.1/8 dev veth0"); + SYS(out, "ip addr add 10.0.0.2/8 dev veth1"); veth_ifindex = if_nametoindex("veth0"); - SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); - SYS("ip link set ipip0 up"); - SYS("ip addr add 192.168.1.1/16 dev ipip0"); + SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS(out, "ip link set ipip0 up"); + SYS(out, "ip addr add 192.168.1.1/16 dev ipip0"); ipip_ifindex = if_nametoindex("ipip0"); bpf_obj = empty_skb__open_and_load(); @@ -142,5 +137,5 @@ out: empty_skb__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del empty_skb"); + SYS_NOFAIL("ip netns del empty_skb"); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 61ccddccf485e..429393caf6122 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -8,14 +8,6 @@ #include "network_helpers.h" #include "fib_lookup.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" #define IPV6_NUD_FAILED_ADDR "face::1" @@ -59,16 +51,16 @@ static int setup_netns(void) { int err; - SYS("ip link add veth1 type veth peer name veth2"); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip link add veth1 type veth peer name veth2"); + SYS(fail, "ip link set dev veth1 up"); - SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); - SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) @@ -140,7 +132,7 @@ void test_fib_lookup(void) return; prog_fd = bpf_program__fd(skel->progs.fib_lookup); - SYS("ip netns add %s", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -182,6 +174,6 @@ void test_fib_lookup(void) fail: if (nstoken) close_netns(nstoken); - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); fib_lookup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index bca5e6839ac48..6ee22c3b251ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -137,24 +137,16 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - SYS("ip link add veth_src type veth peer name veth_src_fwd"); - SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); + SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); - SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS("ip link set veth_dst address " MAC_DST); + SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set veth_dst address " MAC_DST); if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; @@ -175,27 +167,27 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) goto fail; - SYS("ip link set veth_src netns " NS_SRC); - SYS("ip link set veth_src_fwd netns " NS_FWD); - SYS("ip link set veth_dst_fwd netns " NS_FWD); - SYS("ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set veth_src netns " NS_SRC); + SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS("ip addr add " IP4_SRC "/32 dev veth_src"); - SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS("ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS(fail, "ip link set dev veth_src up"); - SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); - SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", veth_src_fwd_addr); - SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", veth_src_fwd_addr); close_netns(nstoken); @@ -209,15 +201,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS("ip link set dev veth_src_fwd up"); - SYS("ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS(fail, "ip link set dev veth_src_fwd up"); + SYS(fail, "ip link set dev veth_dst_fwd up"); - SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); close_netns(nstoken); @@ -226,16 +218,16 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS("ip addr add " IP4_DST "/32 dev veth_dst"); - SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS("ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS(fail, "ip link set dev veth_dst up"); - SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); - SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); close_netns(nstoken); @@ -375,7 +367,7 @@ done: static int test_ping(int family, const char *addr) { - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); + SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); return 0; fail: return -1; @@ -953,7 +945,7 @@ static int tun_open(char *name) if (!ASSERT_OK(err, "ioctl TUNSETIFF")) goto fail; - SYS("ip link set dev %s up", name); + SYS(fail, "ip link set dev %s up", name); return fd; fail: @@ -1076,23 +1068,23 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); - SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 07ad457f33709..47f1d482fe390 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -91,30 +91,15 @@ #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int config_device(void) { - SYS("ip netns add at_ns0"); - SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); - SYS("ip link set veth0 netns at_ns0"); - SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip link set dev veth1 up mtu 1500"); - SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); - SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); + SYS(fail, "ip netns add at_ns0"); + SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); + SYS(fail, "ip link set veth0 netns at_ns0"); + SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); + SYS(fail, "ip link set dev veth1 up mtu 1500"); + SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); return 0; fail: @@ -132,23 +117,23 @@ static void cleanup(void) static int add_vxlan_tunnel(void) { /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ - SYS("ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV1); - SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1); return 0; @@ -165,26 +150,26 @@ static void delete_vxlan_tunnel(void) static int add_ip6vxlan_tunnel(void) { - SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", + SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", IP6_ADDR_VETH0); - SYS("ip netns exec at_ns0 ip link set dev veth0 up"); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up"); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS(fail, "ip link set dev veth1 up"); /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); /* root namespace */ - SYS("ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip link set dev %s address %s up", + SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); return 0; @@ -205,7 +190,7 @@ static void delete_ip6vxlan_tunnel(void) static int test_ping(int family, const char *addr) { - SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); return 0; fail: return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 5e3a26b15ec62..d19f79048ff6f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -141,41 +141,33 @@ static const char * const xmit_policy_names[] = { static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, int bond_both_attach) { -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - return -1; \ - }) - - SYS("ip netns add ns_dst"); - SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); - SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); - - SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip netns add ns_dst"); + SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); - SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); - SYS("ip link set veth1_1 master bond1"); + SYS(fail, "ip link set veth1_1 master bond1"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) { - SYS("ip link set veth1_2 master bond1"); + SYS(fail, "ip link set veth1_2 master bond1"); } else { - SYS("ip link set veth1_2 up addrgenmode none"); + SYS(fail, "ip link set veth1_2 up addrgenmode none"); if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) return -1; } - SYS("ip -netns ns_dst link set veth2_1 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) - SYS("ip -netns ns_dst link set veth2_2 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2"); else - SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none"); /* Load a dummy program on sending side as with veth peer needs to have a * XDP program loaded as well. @@ -194,8 +186,8 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, } return 0; - -#undef SYS +fail: + return -1; } static void bonding_cleanup(struct skeletons *skeletons) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 2666c84dbd014..856cbc29e6a1d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -12,14 +12,6 @@ #include #include "test_xdp_do_redirect.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto out; \ - }) - struct udp_packet { struct ethhdr eth; struct ipv6hdr iph; @@ -126,19 +118,19 @@ void test_xdp_do_redirect(void) * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP * payload. */ - SYS("ip netns add testns"); + SYS(out, "ip netns add testns"); nstoken = open_netns("testns"); if (!ASSERT_OK_PTR(nstoken, "setns")) goto out; - SYS("ip link add veth_src type veth peer name veth_dst"); - SYS("ip link set dev veth_src address 00:11:22:33:44:55"); - SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb"); - SYS("ip link set dev veth_src up"); - SYS("ip link set dev veth_dst up"); - SYS("ip addr add dev veth_src fc00::1/64"); - SYS("ip addr add dev veth_dst fc00::2/64"); - SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); + SYS(out, "ip link add veth_src type veth peer name veth_dst"); + SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55"); + SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb"); + SYS(out, "ip link set dev veth_src up"); + SYS(out, "ip link set dev veth_dst up"); + SYS(out, "ip addr add dev veth_src fc00::1/64"); + SYS(out, "ip addr add dev veth_dst fc00::2/64"); + SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); /* We enable forwarding in the test namespace because that will cause * the packets that go through the kernel stack (with XDP_PASS) to be @@ -151,7 +143,7 @@ void test_xdp_do_redirect(void) * code didn't have this, so we keep the test behaviour to make sure the * bug doesn't resurface. */ - SYS("sysctl -qw net.ipv6.conf.all.forwarding=1"); + SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1"); ifindex_src = if_nametoindex("veth_src"); ifindex_dst = if_nametoindex("veth_dst"); @@ -225,6 +217,6 @@ out_tc: out: if (nstoken) close_netns(nstoken); - system("ip netns del testns"); + SYS_NOFAIL("ip netns del testns"); test_xdp_do_redirect__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index aa4beae99f4f6..490e851dc27df 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -34,11 +34,6 @@ #define PREFIX_LEN "8" #define FAMILY AF_INET -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - struct xsk { void *umem_area; struct xsk_umem *umem; @@ -298,16 +293,16 @@ void test_xdp_metadata(void) /* Setup new networking namespace, with a veth pair. */ - SYS("ip netns add xdp_metadata"); + SYS(out, "ip netns add xdp_metadata"); tok = open_netns("xdp_metadata"); - SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME + SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02"); - SYS("ip link set dev " TX_NAME " up"); - SYS("ip link set dev " RX_NAME " up"); - SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); - SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); + SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set dev " TX_NAME " up"); + SYS(out, "ip link set dev " RX_NAME " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); tx_ifindex = if_nametoindex(TX_NAME); @@ -405,5 +400,5 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del xdp_metadata"); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index c72083885b6d7..8b50a992d233b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -8,11 +8,6 @@ #define CMD_OUT_BUF_SIZE 1023 -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - #define SYS_OUT(cmd, ...) ({ \ char buf[1024]; \ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ @@ -69,37 +64,37 @@ static void test_synproxy(bool xdp) char buf[CMD_OUT_BUF_SIZE]; size_t size; - SYS("ip netns add synproxy"); + SYS(out, "ip netns add synproxy"); - SYS("ip link add tmp0 type veth peer name tmp1"); - SYS("ip link set tmp1 netns synproxy"); - SYS("ip link set tmp0 up"); - SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + SYS(out, "ip link add tmp0 type veth peer name tmp1"); + SYS(out, "ip link set tmp1 netns synproxy"); + SYS(out, "ip link set tmp0 up"); + SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0"); /* When checksum offload is enabled, the XDP program sees wrong * checksums and drops packets. */ - SYS("ethtool -K tmp0 tx off"); + SYS(out, "ethtool -K tmp0 tx off"); if (xdp) /* Workaround required for veth. */ - SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); + SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); ns = open_netns("synproxy"); if (!ASSERT_OK_PTR(ns, "setns")) goto out; - SYS("ip link set lo up"); - SYS("ip link set tmp1 up"); - SYS("ip addr replace 198.18.0.2/24 dev tmp1"); - SYS("sysctl -w net.ipv4.tcp_syncookies=2"); - SYS("sysctl -w net.ipv4.tcp_timestamps=1"); - SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); - SYS("iptables-legacy -t raw -I PREROUTING \ + SYS(out, "ip link set lo up"); + SYS(out, "ip link set tmp1 up"); + SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1"); + SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2"); + SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1"); + SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS(out, "iptables-legacy -t raw -I PREROUTING \ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ @@ -170,8 +165,8 @@ out: if (ns) close_netns(ns); - system("ip link del tmp0"); - system("ip netns del synproxy"); + SYS_NOFAIL("ip link del tmp0"); + SYS_NOFAIL("ip netns del synproxy"); } void test_xdp_synproxy(void) diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c index 8b03c9bb48625..d37f5394e199e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -69,21 +69,6 @@ "proto esp aead 'rfc4106(gcm(aes))' " \ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) { LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, @@ -126,23 +111,23 @@ static void cleanup(void) static int config_underlay(void) { - SYS("ip netns add " NS0); - SYS("ip netns add " NS1); - SYS("ip netns add " NS2); + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + SYS(fail, "ip netns add " NS2); /* NS0 <-> NS1 [veth01 <-> veth10] */ - SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); - SYS("ip -net " NS0 " link set dev veth01 up"); - SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); - SYS("ip -net " NS1 " link set dev veth10 up"); + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); /* NS0 <-> NS2 [veth02 <-> veth20] */ - SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); - SYS("ip -net " NS0 " link set dev veth02 up"); - SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); - SYS("ip -net " NS2 " link set dev veth20 up"); + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); + SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS(fail, "ip -net " NS2 " link set dev veth20 up"); return 0; fail: @@ -153,20 +138,20 @@ static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, const char *ipv4_remote, int if_id) { /* State: local -> remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); /* State: local <- remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); /* Policy: local -> remote */ - SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_local, ipv4_remote, if_id); /* Policy: local <- remote */ - SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_remote, ipv4_local, if_id); @@ -274,16 +259,16 @@ static int config_overlay(void) if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) goto fail; - SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); - SYS("ip -net " NS0 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS(fail, "ip -net " NS0 " link set dev ipsec0 up"); - SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); - SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS1 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS1 " link set dev ipsec0 up"); - SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); - SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS2 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS2 " link set dev ipsec0 up"); return 0; fail: @@ -294,7 +279,7 @@ static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) { skel->bss->req_if_id = if_id; - SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); + SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) goto fail; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d5d51ec97ec87..9fbdc57c5b570 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -376,6 +376,21 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define SYS(goto_label, fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto goto_label; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; -- GitLab From 02d6a057c7bee44902c843949de6bbd439e33092 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 24 Feb 2023 14:13:43 +0800 Subject: [PATCH 0053/2078] selftests/bpf: run mptcp in a dedicated netns The current mptcp test is run in init netns. If the user or default system config disabled mptcp, the test will fail. Let's run the mptcp test in a dedicated netns to avoid none kernel default mptcp setting. Suggested-by: Martin KaFai Lau Signed-off-by: Hangbin Liu Acked-by: Matthieu Baerts Link: https://lore.kernel.org/r/20230224061343.506571-3-liuhangbin@gmail.com Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/mptcp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 59f08d6d1d53e..cd0c42fff7c05 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -7,6 +7,8 @@ #include "network_helpers.h" #include "mptcp_sock.skel.h" +#define NS_TEST "mptcp_ns" + #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif @@ -138,12 +140,20 @@ out: static void test_base(void) { + struct nstoken *nstoken = NULL; int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/mptcp"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + /* without MPTCP */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_server")) @@ -157,13 +167,18 @@ with_mptcp: /* with MPTCP */ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) - goto close_cgroup_fd; + goto fail; ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp"); close(server_fd); -close_cgroup_fd: +fail: + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + close(cgroup_fd); } -- GitLab From 0f485805d008aa56644f68179a7e6579fc1515e7 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 17:00:33 +0900 Subject: [PATCH 0054/2078] wifi: brcmfmac: acpi: Add support for fetching Apple ACPI properties On DT platforms, the module-instance and antenna-sku-info properties are passed in the DT. On ACPI platforms, module-instance is passed via the analogous Apple device property mechanism, while the antenna SKU info is instead obtained via an ACPI method that grabs it from non-volatile storage. Add support for this, to allow proper firmware selection on Apple platforms. Signed-off-by: Hector Martin Reviewed-by: Julian Calaby Acked-by: Linus Walleij Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214080034.3828-2-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/Makefile | 2 + .../broadcom/brcm80211/brcmfmac/acpi.c | 51 +++++++++++++++++++ .../broadcom/brcm80211/brcmfmac/common.c | 1 + .../broadcom/brcm80211/brcmfmac/common.h | 9 ++++ 4 files changed, 63 insertions(+) create mode 100644 drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile index 0e996cf24f882..dc6d27a36faa9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile @@ -48,6 +48,8 @@ brcmfmac-$(CONFIG_OF) += \ of.o brcmfmac-$(CONFIG_DMI) += \ dmi.o +brcmfmac-$(CONFIG_ACPI) += \ + acpi.o ifeq ($(CONFIG_BRCMFMAC),m) obj-m += wcc/ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c new file mode 100644 index 0000000000000..c4a54861bfb40 --- /dev/null +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: ISC +/* + * Copyright The Asahi Linux Contributors + */ + +#include +#include "debug.h" +#include "core.h" +#include "common.h" + +void brcmf_acpi_probe(struct device *dev, enum brcmf_bus_type bus_type, + struct brcmf_mp_device *settings) +{ + acpi_status status; + const union acpi_object *o; + struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_device *adev = ACPI_COMPANION(dev); + + if (!adev) + return; + + if (!ACPI_FAILURE(acpi_dev_get_property(adev, "module-instance", + ACPI_TYPE_STRING, &o))) { + brcmf_dbg(INFO, "ACPI module-instance=%s\n", o->string.pointer); + settings->board_type = devm_kasprintf(dev, GFP_KERNEL, + "apple,%s", + o->string.pointer); + } else { + brcmf_dbg(INFO, "No ACPI module-instance\n"); + return; + } + + status = acpi_evaluate_object(adev->handle, "RWCV", NULL, &buf); + o = buf.pointer; + if (!ACPI_FAILURE(status) && o && o->type == ACPI_TYPE_BUFFER && + o->buffer.length >= 2) { + char *antenna_sku = devm_kzalloc(dev, 3, GFP_KERNEL); + + if (antenna_sku) { + memcpy(antenna_sku, o->buffer.pointer, 2); + brcmf_dbg(INFO, "ACPI RWCV data=%*phN antenna-sku=%s\n", + (int)o->buffer.length, o->buffer.pointer, + antenna_sku); + settings->antenna_sku = antenna_sku; + } + + kfree(buf.pointer); + } else { + brcmf_dbg(INFO, "No ACPI antenna-sku\n"); + } +} diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index f235beaddddba..4fea6f0ea8c88 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -487,6 +487,7 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev, /* No platform data for this device, try OF and DMI data */ brcmf_dmi_probe(settings, chip, chiprev); brcmf_of_probe(dev, bus_type, settings); + brcmf_acpi_probe(dev, bus_type, settings); } return settings; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h index aa25abffcc7db..7167fd4f8c639 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h @@ -77,6 +77,15 @@ static inline void brcmf_dmi_probe(struct brcmf_mp_device *settings, u32 chip, u32 chiprev) {} #endif +#ifdef CONFIG_ACPI +void brcmf_acpi_probe(struct device *dev, enum brcmf_bus_type bus_type, + struct brcmf_mp_device *settings); +#else +static inline void brcmf_acpi_probe(struct device *dev, + enum brcmf_bus_type bus_type, + struct brcmf_mp_device *settings) {} +#endif + u8 brcmf_map_prio_to_prec(void *cfg, u8 prio); u8 brcmf_map_prio_to_aci(void *cfg, u8 prio); -- GitLab From 91918ce88d9fef408bb12c46a27c73d79b604c20 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 17:00:34 +0900 Subject: [PATCH 0055/2078] wifi: brcmfmac: pcie: Provide a buffer of random bytes to the device Newer Apple firmwares on chipsets without a hardware RNG require the host to provide a buffer of 256 random bytes to the device on initialization. This buffer is present immediately before NVRAM, suffixed by a footer containing a magic number and the buffer length. This won't affect chips/firmwares that do not use this feature, so do it unconditionally for all Apple platforms (those with an Apple OTP). Reviewed-by: Linus Walleij Signed-off-by: Hector Martin Reviewed-by: Julian Calaby Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214080034.3828-3-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/pcie.c | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index a9b9b2dc62d4f..2835ef4edb18f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1653,6 +1654,13 @@ brcmf_pcie_init_share_ram_info(struct brcmf_pciedev_info *devinfo, return 0; } +struct brcmf_random_seed_footer { + __le32 length; + __le32 magic; +}; + +#define BRCMF_RANDOM_SEED_MAGIC 0xfeedc0de +#define BRCMF_RANDOM_SEED_LENGTH 0x100 static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, const struct firmware *fw, void *nvram, @@ -1689,6 +1697,30 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, nvram_len; memcpy_toio(devinfo->tcm + address, nvram, nvram_len); brcmf_fw_nvram_free(nvram); + + if (devinfo->otp.valid) { + size_t rand_len = BRCMF_RANDOM_SEED_LENGTH; + struct brcmf_random_seed_footer footer = { + .length = cpu_to_le32(rand_len), + .magic = cpu_to_le32(BRCMF_RANDOM_SEED_MAGIC), + }; + void *randbuf; + + /* Some Apple chips/firmwares expect a buffer of random + * data to be present before NVRAM + */ + brcmf_dbg(PCIE, "Download random seed\n"); + + address -= sizeof(footer); + memcpy_toio(devinfo->tcm + address, &footer, + sizeof(footer)); + + address -= rand_len; + randbuf = kzalloc(rand_len, GFP_KERNEL); + get_random_bytes(randbuf, rand_len); + memcpy_toio(devinfo->tcm + address, randbuf, rand_len); + kfree(randbuf); + } } else { brcmf_dbg(PCIE, "No matching NVRAM file found %s\n", devinfo->nvram_name); -- GitLab From ec52d77d077529f198fd874c550a26b9cc86a331 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 24 Jan 2023 10:42:48 +0000 Subject: [PATCH 0056/2078] wifi: brcmfmac: support CQM RSSI notification with older firmware Using the BCM4339 firmware from linux-firmware (version "BCM4339/2 wl0: Sep 5 2019 11:05:52 version 6.37.39.113 (r722271 CY)" from cypress/cyfmac4339-sdio.bin) the RSSI respose is only 4 bytes, which results in an error being logged. It seems that older devices send only the RSSI field and neither SNR nor noise is included. Handle this by accepting a 4 byte message and reading only the RSSI from it. Fixes: 7dd56ea45a66 ("brcmfmac: add support for CQM RSSI notifications") Signed-off-by: John Keeping Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230124104248.2917465-1-john@metanate.com --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index a9690ec4c850c..d80d2b9b15c74 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -6489,18 +6489,20 @@ static s32 brcmf_notify_rssi(struct brcmf_if *ifp, { struct brcmf_cfg80211_vif *vif = ifp->vif; struct brcmf_rssi_be *info = data; - s32 rssi, snr, noise; + s32 rssi, snr = 0, noise = 0; s32 low, high, last; - if (e->datalen < sizeof(*info)) { + if (e->datalen >= sizeof(*info)) { + rssi = be32_to_cpu(info->rssi); + snr = be32_to_cpu(info->snr); + noise = be32_to_cpu(info->noise); + } else if (e->datalen >= sizeof(rssi)) { + rssi = be32_to_cpu(*(__be32 *)data); + } else { brcmf_err("insufficient RSSI event data\n"); return 0; } - rssi = be32_to_cpu(info->rssi); - snr = be32_to_cpu(info->snr); - noise = be32_to_cpu(info->noise); - low = vif->cqm_rssi_low; high = vif->cqm_rssi_high; last = vif->cqm_rssi_last; -- GitLab From 75c4a8154cb6c7239fb55d5550f481f6765fb83c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 24 Feb 2023 12:28:05 +0200 Subject: [PATCH 0057/2078] wifi: ath6kl: reduce WARN to dev_dbg() in callback The warn is triggered on a known race condition, documented in the code above the test, that is correctly handled. Using WARN() hinders automated testing. Reducing severity. Fixes: de2070fc4aa7 ("ath6kl: Fix kernel panic on continuous driver load/unload") Reported-and-tested-by: syzbot+555908813b2ea35dae9a@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230126182431.867984-1-pchelkin@ispras.ru --- drivers/net/wireless/ath/ath6kl/htc_pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c index c68848819a52d..9b88d96bfe96c 100644 --- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c +++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c @@ -960,8 +960,8 @@ static int ath6kl_htc_pipe_rx_complete(struct ath6kl *ar, struct sk_buff *skb, * Thus the possibility of ar->htc_target being NULL * via ath6kl_recv_complete -> ath6kl_usb_io_comp_work. */ - if (WARN_ON_ONCE(!target)) { - ath6kl_err("Target not yet initialized\n"); + if (!target) { + ath6kl_dbg(ATH6KL_DBG_HTC, "Target not yet initialized\n"); status = -EINVAL; goto free_skb; } -- GitLab From a96f10422e74cde27c100b321b127ec32ae75747 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Fri, 24 Feb 2023 12:28:03 +0200 Subject: [PATCH 0058/2078] wifi: ath11k: modify accessor macros to match index size HE PHY is only 11 bytes, therefore it should be using byte indexes instead of dword. Change corresponding macros to reflect this. Signed-off-by: Muna Sinada Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1666128501-12364-2-git-send-email-quic_msinada@quicinc.com --- drivers/net/wireless/ath/ath11k/wmi.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 2273d885f7fa7..9f50be411a8c4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2859,30 +2859,32 @@ struct rx_reorder_queue_remove_params { #define WMI_VDEV_PARAM_TXBF_SU_TX_BFER BIT(2) #define WMI_VDEV_PARAM_TXBF_MU_TX_BFER BIT(3) -#define HECAP_PHYDWORD_0 0 -#define HECAP_PHYDWORD_1 1 -#define HECAP_PHYDWORD_2 2 +#define HE_PHYCAP_BYTE_0 0 +#define HE_PHYCAP_BYTE_1 1 +#define HE_PHYCAP_BYTE_2 2 +#define HE_PHYCAP_BYTE_3 3 +#define HE_PHYCAP_BYTE_4 4 -#define HECAP_PHY_SU_BFER BIT(31) +#define HECAP_PHY_SU_BFER BIT(7) #define HECAP_PHY_SU_BFEE BIT(0) #define HECAP_PHY_MU_BFER BIT(1) -#define HECAP_PHY_UL_MUMIMO BIT(22) -#define HECAP_PHY_UL_MUOFDMA BIT(23) +#define HECAP_PHY_UL_MUMIMO BIT(6) +#define HECAP_PHY_UL_MUOFDMA BIT(7) #define HECAP_PHY_SUBFMR_GET(hecap_phy) \ - FIELD_GET(HECAP_PHY_SU_BFER, hecap_phy[HECAP_PHYDWORD_0]) + FIELD_GET(HECAP_PHY_SU_BFER, hecap_phy[HE_PHYCAP_BYTE_3]) #define HECAP_PHY_SUBFME_GET(hecap_phy) \ - FIELD_GET(HECAP_PHY_SU_BFEE, hecap_phy[HECAP_PHYDWORD_1]) + FIELD_GET(HECAP_PHY_SU_BFEE, hecap_phy[HE_PHYCAP_BYTE_4]) #define HECAP_PHY_MUBFMR_GET(hecap_phy) \ - FIELD_GET(HECAP_PHY_MU_BFER, hecap_phy[HECAP_PHYDWORD_1]) + FIELD_GET(HECAP_PHY_MU_BFER, hecap_phy[HE_PHYCAP_BYTE_4]) #define HECAP_PHY_ULMUMIMO_GET(hecap_phy) \ - FIELD_GET(HECAP_PHY_UL_MUMIMO, hecap_phy[HECAP_PHYDWORD_0]) + FIELD_GET(HECAP_PHY_UL_MUMIMO, hecap_phy[HE_PHYCAP_BYTE_2]) #define HECAP_PHY_ULOFDMA_GET(hecap_phy) \ - FIELD_GET(HECAP_PHY_UL_MUOFDMA, hecap_phy[HECAP_PHYDWORD_0]) + FIELD_GET(HECAP_PHY_UL_MUOFDMA, hecap_phy[HE_PHYCAP_BYTE_2]) #define HE_MODE_SU_TX_BFEE BIT(0) #define HE_MODE_SU_TX_BFER BIT(1) -- GitLab From 38dfe775d0abf511341f37c1cb77b919a3ad410b Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Fri, 24 Feb 2023 12:28:04 +0200 Subject: [PATCH 0059/2078] wifi: ath11k: push MU-MIMO params from hostapd to hardware In the previous behaviour only HE IE in management frames are changed regarding MU-MIMO configurations and not in hardware. Adding push of MU-MIMO configurations to the hardware as well. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-00356-QCAHKSWPL_SILICONZ-1 Co-developed-by: Anilkumar Kolli Signed-off-by: Anilkumar Kolli Signed-off-by: Muna Sinada Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1666128501-12364-3-git-send-email-quic_msinada@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 200 ++++++++++++++++---------- drivers/net/wireless/ath/ath11k/wmi.h | 3 + 2 files changed, 130 insertions(+), 73 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 110a38cce0a71..7c4f424180dcd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2699,6 +2699,117 @@ static int ath11k_setup_peer_smps(struct ath11k *ar, struct ath11k_vif *arvif, ath11k_smps_map[smps]); } +static bool ath11k_mac_set_he_txbf_conf(struct ath11k_vif *arvif) +{ + struct ath11k *ar = arvif->ar; + u32 param, value; + int ret; + + if (!arvif->vif->bss_conf.he_support) + return true; + + param = WMI_VDEV_PARAM_SET_HEMU_MODE; + value = 0; + if (arvif->vif->bss_conf.he_su_beamformer) { + value |= FIELD_PREP(HE_MODE_SU_TX_BFER, HE_SU_BFER_ENABLE); + if (arvif->vif->bss_conf.he_mu_beamformer && + arvif->vdev_type == WMI_VDEV_TYPE_AP) + value |= FIELD_PREP(HE_MODE_MU_TX_BFER, HE_MU_BFER_ENABLE); + } + + if (arvif->vif->type != NL80211_IFTYPE_MESH_POINT) { + value |= FIELD_PREP(HE_MODE_DL_OFDMA, HE_DL_MUOFDMA_ENABLE) | + FIELD_PREP(HE_MODE_UL_OFDMA, HE_UL_MUOFDMA_ENABLE); + + if (arvif->vif->bss_conf.he_full_ul_mumimo) + value |= FIELD_PREP(HE_MODE_UL_MUMIMO, HE_UL_MUMIMO_ENABLE); + + if (arvif->vif->bss_conf.he_su_beamformee) + value |= FIELD_PREP(HE_MODE_SU_TX_BFEE, HE_SU_BFEE_ENABLE); + } + + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, param, value); + if (ret) { + ath11k_warn(ar->ab, "failed to set vdev %d HE MU mode: %d\n", + arvif->vdev_id, ret); + return false; + } + + param = WMI_VDEV_PARAM_SET_HE_SOUNDING_MODE; + value = FIELD_PREP(HE_VHT_SOUNDING_MODE, HE_VHT_SOUNDING_MODE_ENABLE) | + FIELD_PREP(HE_TRIG_NONTRIG_SOUNDING_MODE, + HE_TRIG_NONTRIG_SOUNDING_MODE_ENABLE); + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + param, value); + if (ret) { + ath11k_warn(ar->ab, "failed to set vdev %d sounding mode: %d\n", + arvif->vdev_id, ret); + return false; + } + return true; +} + +static bool ath11k_mac_vif_recalc_sta_he_txbf(struct ath11k *ar, + struct ieee80211_vif *vif, + struct ieee80211_sta_he_cap *he_cap) +{ + struct ath11k_vif *arvif = (void *)vif->drv_priv; + struct ieee80211_he_cap_elem he_cap_elem = {0}; + struct ieee80211_sta_he_cap *cap_band = NULL; + struct cfg80211_chan_def def; + u32 param = WMI_VDEV_PARAM_SET_HEMU_MODE; + u32 hemode = 0; + int ret; + + if (!vif->bss_conf.he_support) + return true; + + if (vif->type != NL80211_IFTYPE_STATION) + return false; + + if (WARN_ON(ath11k_mac_vif_chan(vif, &def))) + return false; + + if (def.chan->band == NL80211_BAND_2GHZ) + cap_band = &ar->mac.iftype[NL80211_BAND_2GHZ][vif->type].he_cap; + else + cap_band = &ar->mac.iftype[NL80211_BAND_5GHZ][vif->type].he_cap; + + memcpy(&he_cap_elem, &cap_band->he_cap_elem, sizeof(he_cap_elem)); + + if (HECAP_PHY_SUBFME_GET(he_cap_elem.phy_cap_info)) { + if (HECAP_PHY_SUBFMR_GET(he_cap->he_cap_elem.phy_cap_info)) + hemode |= FIELD_PREP(HE_MODE_SU_TX_BFEE, HE_SU_BFEE_ENABLE); + if (HECAP_PHY_MUBFMR_GET(he_cap->he_cap_elem.phy_cap_info)) + hemode |= FIELD_PREP(HE_MODE_MU_TX_BFEE, HE_MU_BFEE_ENABLE); + } + + if (vif->type != NL80211_IFTYPE_MESH_POINT) { + hemode |= FIELD_PREP(HE_MODE_DL_OFDMA, HE_DL_MUOFDMA_ENABLE) | + FIELD_PREP(HE_MODE_UL_OFDMA, HE_UL_MUOFDMA_ENABLE); + + if (HECAP_PHY_ULMUMIMO_GET(he_cap_elem.phy_cap_info)) + if (HECAP_PHY_ULMUMIMO_GET(he_cap->he_cap_elem.phy_cap_info)) + hemode |= FIELD_PREP(HE_MODE_UL_MUMIMO, + HE_UL_MUMIMO_ENABLE); + + if (FIELD_GET(HE_MODE_MU_TX_BFEE, hemode)) + hemode |= FIELD_PREP(HE_MODE_SU_TX_BFEE, HE_SU_BFEE_ENABLE); + + if (FIELD_GET(HE_MODE_MU_TX_BFER, hemode)) + hemode |= FIELD_PREP(HE_MODE_SU_TX_BFER, HE_SU_BFER_ENABLE); + } + + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, param, hemode); + if (ret) { + ath11k_warn(ar->ab, "failed to submit vdev param txbf 0x%x: %d\n", + hemode, ret); + return false; + } + + return true; +} + static void ath11k_bss_assoc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf) @@ -2709,6 +2820,7 @@ static void ath11k_bss_assoc(struct ieee80211_hw *hw, struct ieee80211_sta *ap_sta; struct ath11k_peer *peer; bool is_auth = false; + struct ieee80211_sta_he_cap he_cap; int ret; lockdep_assert_held(&ar->conf_mutex); @@ -2726,6 +2838,9 @@ static void ath11k_bss_assoc(struct ieee80211_hw *hw, return; } + /* he_cap here is updated at assoc success for sta mode only */ + he_cap = ap_sta->deflink.he_cap; + ath11k_peer_assoc_prepare(ar, vif, ap_sta, &peer_arg, false); rcu_read_unlock(); @@ -2753,6 +2868,12 @@ static void ath11k_bss_assoc(struct ieee80211_hw *hw, return; } + if (!ath11k_mac_vif_recalc_sta_he_txbf(ar, vif, &he_cap)) { + ath11k_warn(ar->ab, "failed to recalc he txbf for vdev %i on bss %pM\n", + arvif->vdev_id, bss_conf->bssid); + return; + } + WARN_ON(arvif->is_up); arvif->aid = vif->cfg.aid; @@ -3202,6 +3323,8 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, ether_addr_copy(arvif->bssid, info->bssid); if (changed & BSS_CHANGED_BEACON_ENABLED) { + if (info->enable_beacon) + ath11k_mac_set_he_txbf_conf(arvif); ath11k_control_beaconing(arvif, info); if (arvif->is_up && vif->bss_conf.he_support && @@ -5392,6 +5515,10 @@ static int ath11k_mac_copy_he_cap(struct ath11k *ar, he_cap_elem->mac_cap_info[1] &= IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK; + he_cap_elem->phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + he_cap_elem->phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; he_cap_elem->phy_cap_info[5] &= ~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK; @@ -6026,69 +6153,6 @@ ath11k_mac_setup_vdev_create_params(struct ath11k_vif *arvif, } } -static u32 -ath11k_mac_prepare_he_mode(struct ath11k_pdev *pdev, u32 viftype) -{ - struct ath11k_pdev_cap *pdev_cap = &pdev->cap; - struct ath11k_band_cap *cap_band = NULL; - u32 *hecap_phy_ptr = NULL; - u32 hemode = 0; - - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) - cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; - else - cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; - - hecap_phy_ptr = &cap_band->he_cap_phy_info[0]; - - hemode = FIELD_PREP(HE_MODE_SU_TX_BFEE, HE_SU_BFEE_ENABLE) | - FIELD_PREP(HE_MODE_SU_TX_BFER, HECAP_PHY_SUBFMR_GET(hecap_phy_ptr)) | - FIELD_PREP(HE_MODE_UL_MUMIMO, HECAP_PHY_ULMUMIMO_GET(hecap_phy_ptr)); - - /* TODO WDS and other modes */ - if (viftype == NL80211_IFTYPE_AP) { - hemode |= FIELD_PREP(HE_MODE_MU_TX_BFER, - HECAP_PHY_MUBFMR_GET(hecap_phy_ptr)) | - FIELD_PREP(HE_MODE_DL_OFDMA, HE_DL_MUOFDMA_ENABLE) | - FIELD_PREP(HE_MODE_UL_OFDMA, HE_UL_MUOFDMA_ENABLE); - } else { - hemode |= FIELD_PREP(HE_MODE_MU_TX_BFEE, HE_MU_BFEE_ENABLE); - } - - return hemode; -} - -static int ath11k_set_he_mu_sounding_mode(struct ath11k *ar, - struct ath11k_vif *arvif) -{ - u32 param_id, param_value; - struct ath11k_base *ab = ar->ab; - int ret = 0; - - param_id = WMI_VDEV_PARAM_SET_HEMU_MODE; - param_value = ath11k_mac_prepare_he_mode(ar->pdev, arvif->vif->type); - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - param_id, param_value); - if (ret) { - ath11k_warn(ab, "failed to set vdev %d HE MU mode: %d param_value %x\n", - arvif->vdev_id, ret, param_value); - return ret; - } - param_id = WMI_VDEV_PARAM_SET_HE_SOUNDING_MODE; - param_value = - FIELD_PREP(HE_VHT_SOUNDING_MODE, HE_VHT_SOUNDING_MODE_ENABLE) | - FIELD_PREP(HE_TRIG_NONTRIG_SOUNDING_MODE, - HE_TRIG_NONTRIG_SOUNDING_MODE_ENABLE); - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - param_id, param_value); - if (ret) { - ath11k_warn(ab, "failed to set vdev %d HE MU mode: %d\n", - arvif->vdev_id, ret); - return ret; - } - return ret; -} - static void ath11k_mac_op_update_vif_offload(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { @@ -6757,7 +6821,6 @@ ath11k_mac_vdev_start_restart(struct ath11k_vif *arvif, struct ath11k_base *ab = ar->ab; struct wmi_vdev_start_req_arg arg = {}; const struct cfg80211_chan_def *chandef = &ctx->def; - int he_support = arvif->vif->bss_conf.he_support; int ret = 0; lockdep_assert_held(&ar->conf_mutex); @@ -6798,15 +6861,6 @@ ath11k_mac_vdev_start_restart(struct ath11k_vif *arvif, spin_lock_bh(&ab->base_lock); arg.regdomain = ar->ab->dfs_region; spin_unlock_bh(&ab->base_lock); - - if (he_support) { - ret = ath11k_set_he_mu_sounding_mode(ar, arvif); - if (ret) { - ath11k_warn(ar->ab, "failed to set he mode vdev %i\n", - arg.vdev_id); - return ret; - } - } } arg.channel.passive |= !!(chandef->chan->flags & IEEE80211_CHAN_NO_IR); diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 9f50be411a8c4..d3b74f1346f58 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2897,8 +2897,11 @@ struct rx_reorder_queue_remove_params { #define HE_DL_MUOFDMA_ENABLE 1 #define HE_UL_MUOFDMA_ENABLE 1 #define HE_DL_MUMIMO_ENABLE 1 +#define HE_UL_MUMIMO_ENABLE 1 #define HE_MU_BFEE_ENABLE 1 #define HE_SU_BFEE_ENABLE 1 +#define HE_MU_BFER_ENABLE 1 +#define HE_SU_BFER_ENABLE 1 #define HE_VHT_SOUNDING_MODE_ENABLE 1 #define HE_SU_MU_SOUNDING_MODE_ENABLE 1 -- GitLab From 8077c1bbbc28e527fb29143c46f32c6a9d6cadf0 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Fri, 24 Feb 2023 12:28:04 +0200 Subject: [PATCH 0060/2078] wifi: ath11k: move HE MCS mapper to a separate function Move HE MCS mapper to a separate function and call new function in ath11k_mac_copy_he_cap(). Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-00356-QCAHKSWPL_SILICONZ-1 Signed-off-by: Muna Sinada Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1666128501-12364-4-git-send-email-quic_msinada@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 34 +++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 7c4f424180dcd..0835ab8b389a6 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5483,6 +5483,27 @@ static __le16 ath11k_mac_setup_he_6ghz_cap(struct ath11k_pdev_cap *pcap, return cpu_to_le16(bcap->he_6ghz_capa); } +static void ath11k_mac_set_hemcsmap(struct ath11k *ar, + struct ath11k_pdev_cap *cap, + struct ieee80211_sta_he_cap *he_cap, + int band) +{ + struct ath11k_band_cap *band_cap = &cap->band[band]; + + he_cap->he_mcs_nss_supp.rx_mcs_80 = + cpu_to_le16(band_cap->he_mcs & 0xffff); + he_cap->he_mcs_nss_supp.tx_mcs_80 = + cpu_to_le16(band_cap->he_mcs & 0xffff); + he_cap->he_mcs_nss_supp.rx_mcs_160 = + cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + he_cap->he_mcs_nss_supp.tx_mcs_160 = + cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + he_cap->he_mcs_nss_supp.rx_mcs_80p80 = + cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + he_cap->he_mcs_nss_supp.tx_mcs_80p80 = + cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); +} + static int ath11k_mac_copy_he_cap(struct ath11k *ar, struct ath11k_pdev_cap *cap, struct ieee80211_sband_iftype_data *data, @@ -5544,18 +5565,7 @@ static int ath11k_mac_copy_he_cap(struct ath11k *ar, break; } - he_cap->he_mcs_nss_supp.rx_mcs_80 = - cpu_to_le16(band_cap->he_mcs & 0xffff); - he_cap->he_mcs_nss_supp.tx_mcs_80 = - cpu_to_le16(band_cap->he_mcs & 0xffff); - he_cap->he_mcs_nss_supp.rx_mcs_160 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); - he_cap->he_mcs_nss_supp.tx_mcs_160 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); - he_cap->he_mcs_nss_supp.rx_mcs_80p80 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); - he_cap->he_mcs_nss_supp.tx_mcs_80p80 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + ath11k_mac_set_hemcsmap(ar, cap, he_cap, band); memset(he_cap->ppe_thres, 0, sizeof(he_cap->ppe_thres)); if (he_cap_elem->phy_cap_info[6] & -- GitLab From ebf82988f844dd98e6b007cffcc5e95986056995 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Fri, 24 Feb 2023 12:28:04 +0200 Subject: [PATCH 0061/2078] wifi: ath11k: generate rx and tx mcs maps for supported HE mcs Generate rx and tx mcs maps in ath11k_mac_set_hemcsmap() and set them in supported mcs/nss for HE capabilities. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-00356-QCAHKSWPL_SILICONZ-1 Signed-off-by: Muna Sinada Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1666128501-12364-5-git-send-email-quic_msinada@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 30 ++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 0835ab8b389a6..fd1a23ead5e22 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5488,20 +5488,36 @@ static void ath11k_mac_set_hemcsmap(struct ath11k *ar, struct ieee80211_sta_he_cap *he_cap, int band) { - struct ath11k_band_cap *band_cap = &cap->band[band]; + u16 txmcs_map, rxmcs_map; + u32 i; + rxmcs_map = 0; + txmcs_map = 0; + for (i = 0; i < 8; i++) { + if (i < ar->num_tx_chains && + (ar->cfg_tx_chainmask >> cap->tx_chain_mask_shift) & BIT(i)) + txmcs_map |= IEEE80211_HE_MCS_SUPPORT_0_11 << (i * 2); + else + txmcs_map |= IEEE80211_HE_MCS_NOT_SUPPORTED << (i * 2); + + if (i < ar->num_rx_chains && + (ar->cfg_rx_chainmask >> cap->tx_chain_mask_shift) & BIT(i)) + rxmcs_map |= IEEE80211_HE_MCS_SUPPORT_0_11 << (i * 2); + else + rxmcs_map |= IEEE80211_HE_MCS_NOT_SUPPORTED << (i * 2); + } he_cap->he_mcs_nss_supp.rx_mcs_80 = - cpu_to_le16(band_cap->he_mcs & 0xffff); + cpu_to_le16(rxmcs_map & 0xffff); he_cap->he_mcs_nss_supp.tx_mcs_80 = - cpu_to_le16(band_cap->he_mcs & 0xffff); + cpu_to_le16(txmcs_map & 0xffff); he_cap->he_mcs_nss_supp.rx_mcs_160 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + cpu_to_le16(rxmcs_map & 0xffff); he_cap->he_mcs_nss_supp.tx_mcs_160 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + cpu_to_le16(txmcs_map & 0xffff); he_cap->he_mcs_nss_supp.rx_mcs_80p80 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + cpu_to_le16(rxmcs_map & 0xffff); he_cap->he_mcs_nss_supp.tx_mcs_80p80 = - cpu_to_le16((band_cap->he_mcs >> 16) & 0xffff); + cpu_to_le16(txmcs_map & 0xffff); } static int ath11k_mac_copy_he_cap(struct ath11k *ar, -- GitLab From 3c7c07ca7ab144ef25402858078b27806322b752 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:14 +0900 Subject: [PATCH 0062/2078] wifi: brcmfmac: chip: Only disable D11 cores; handle an arbitrary number At least on BCM4387, the D11 cores are held in reset on cold startup and firmware expects to release reset itself. Just assert reset here and let firmware deassert it. Premature deassertion results in the firmware failing to initialize properly some of the time, with strange AXI bus errors. Also, BCM4387 has 3 cores, up from 2. The logic for handling that is in brcmf_chip_ai_resetcore(), but since we aren't using that any more, just handle it here. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-1-marcan@marcan.st --- .../net/wireless/broadcom/brcm80211/brcmfmac/chip.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index 8073f31be27d9..a6239051404bf 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -1292,15 +1292,18 @@ static bool brcmf_chip_cm3_set_active(struct brcmf_chip_priv *chip) static inline void brcmf_chip_cr4_set_passive(struct brcmf_chip_priv *chip) { + int i; struct brcmf_core *core; brcmf_chip_disable_arm(chip, BCMA_CORE_ARM_CR4); - core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_80211); - brcmf_chip_resetcore(core, D11_BCMA_IOCTL_PHYRESET | - D11_BCMA_IOCTL_PHYCLOCKEN, - D11_BCMA_IOCTL_PHYCLOCKEN, - D11_BCMA_IOCTL_PHYCLOCKEN); + /* Disable the cores only and let the firmware enable them. + * Releasing reset ourselves breaks BCM4387 in weird ways. + */ + for (i = 0; (core = brcmf_chip_get_d11core(&chip->pub, i)); i++) + brcmf_chip_coredisable(core, D11_BCMA_IOCTL_PHYRESET | + D11_BCMA_IOCTL_PHYCLOCKEN, + D11_BCMA_IOCTL_PHYCLOCKEN); } static bool brcmf_chip_cr4_set_active(struct brcmf_chip_priv *chip, u32 rstvec) -- GitLab From 098e0b105ce1047ad9984dc79287573e313b1232 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:15 +0900 Subject: [PATCH 0063/2078] wifi: brcmfmac: chip: Handle 1024-unit sizes for TCM blocks BCM4387 has trailing odd-sized blocks as part of TCM which have their size described as a multiple of 1024 instead of 8192. Handle this so we can compute the TCM size properly. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-2-marcan@marcan.st --- .../net/wireless/broadcom/brcm80211/brcmfmac/chip.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index a6239051404bf..50e0c015cf43a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -212,8 +212,9 @@ struct sbsocramregs { #define ARMCR4_TCBANB_MASK 0xf #define ARMCR4_TCBANB_SHIFT 0 -#define ARMCR4_BSZ_MASK 0x3f +#define ARMCR4_BSZ_MASK 0x7f #define ARMCR4_BSZ_MULT 8192 +#define ARMCR4_BLK_1K_MASK 0x200 struct brcmf_core_priv { struct brcmf_core pub; @@ -684,6 +685,7 @@ static u32 brcmf_chip_tcm_ramsize(struct brcmf_core_priv *cr4) u32 nbb; u32 totb; u32 bxinfo; + u32 blksize; u32 idx; corecap = brcmf_chip_core_read32(cr4, ARMCR4_CAP); @@ -695,7 +697,11 @@ static u32 brcmf_chip_tcm_ramsize(struct brcmf_core_priv *cr4) for (idx = 0; idx < totb; idx++) { brcmf_chip_core_write32(cr4, ARMCR4_BANKIDX, idx); bxinfo = brcmf_chip_core_read32(cr4, ARMCR4_BANKINFO); - memsize += ((bxinfo & ARMCR4_BSZ_MASK) + 1) * ARMCR4_BSZ_MULT; + blksize = ARMCR4_BSZ_MULT; + if (bxinfo & ARMCR4_BLK_1K_MASK) + blksize >>= 3; + + memsize += ((bxinfo & ARMCR4_BSZ_MASK) + 1) * blksize; } return memsize; -- GitLab From 398ce273d6b16a57dee99e4054a2be37f0a958ed Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:16 +0900 Subject: [PATCH 0064/2078] wifi: brcmfmac: cfg80211: Add support for scan params v2 This new API version is required for at least the BCM4387 firmware. Add support for it, with a fallback to the v1 API. Acked-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-3-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 245 +++++++++++------- .../broadcom/brcm80211/brcmfmac/feature.c | 1 + .../broadcom/brcm80211/brcmfmac/feature.h | 4 +- .../broadcom/brcm80211/brcmfmac/fwil_types.h | 49 +++- 4 files changed, 209 insertions(+), 90 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d80d2b9b15c74..77706e204a8f3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1039,12 +1039,134 @@ void brcmf_set_mpc(struct brcmf_if *ifp, int mpc) } } +static void brcmf_scan_params_v2_to_v1(struct brcmf_scan_params_v2_le *params_v2_le, + struct brcmf_scan_params_le *params_le) +{ + size_t params_size; + u32 ch; + int n_channels, n_ssids; + + memcpy(¶ms_le->ssid_le, ¶ms_v2_le->ssid_le, + sizeof(params_le->ssid_le)); + memcpy(¶ms_le->bssid, ¶ms_v2_le->bssid, + sizeof(params_le->bssid)); + + params_le->bss_type = params_v2_le->bss_type; + params_le->scan_type = le32_to_cpu(params_v2_le->scan_type); + params_le->nprobes = params_v2_le->nprobes; + params_le->active_time = params_v2_le->active_time; + params_le->passive_time = params_v2_le->passive_time; + params_le->home_time = params_v2_le->home_time; + params_le->channel_num = params_v2_le->channel_num; + + ch = le32_to_cpu(params_v2_le->channel_num); + n_channels = ch & BRCMF_SCAN_PARAMS_COUNT_MASK; + n_ssids = ch >> BRCMF_SCAN_PARAMS_NSSID_SHIFT; + + params_size = sizeof(u16) * n_channels; + if (n_ssids > 0) { + params_size = roundup(params_size, sizeof(u32)); + params_size += sizeof(struct brcmf_ssid_le) * n_ssids; + } + + memcpy(¶ms_le->channel_list[0], + ¶ms_v2_le->channel_list[0], params_size); +} + +static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, + struct brcmf_scan_params_v2_le *params_le, + struct cfg80211_scan_request *request) +{ + u32 n_ssids; + u32 n_channels; + s32 i; + s32 offset; + u16 chanspec; + char *ptr; + int length; + struct brcmf_ssid_le ssid_le; + + eth_broadcast_addr(params_le->bssid); + + length = BRCMF_SCAN_PARAMS_V2_FIXED_SIZE; + + params_le->version = cpu_to_le16(BRCMF_SCAN_PARAMS_VERSION_V2); + params_le->bss_type = DOT11_BSSTYPE_ANY; + params_le->scan_type = cpu_to_le32(BRCMF_SCANTYPE_ACTIVE); + params_le->channel_num = 0; + params_le->nprobes = cpu_to_le32(-1); + params_le->active_time = cpu_to_le32(-1); + params_le->passive_time = cpu_to_le32(-1); + params_le->home_time = cpu_to_le32(-1); + memset(¶ms_le->ssid_le, 0, sizeof(params_le->ssid_le)); + + /* Scan abort */ + if (!request) { + length += sizeof(u16); + params_le->channel_num = cpu_to_le32(1); + params_le->channel_list[0] = cpu_to_le16(-1); + params_le->length = cpu_to_le16(length); + return; + } + + n_ssids = request->n_ssids; + n_channels = request->n_channels; + + /* Copy channel array if applicable */ + brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n", + n_channels); + if (n_channels > 0) { + length += roundup(sizeof(u16) * n_channels, sizeof(u32)); + for (i = 0; i < n_channels; i++) { + chanspec = channel_to_chanspec(&cfg->d11inf, + request->channels[i]); + brcmf_dbg(SCAN, "Chan : %d, Channel spec: %x\n", + request->channels[i]->hw_value, chanspec); + params_le->channel_list[i] = cpu_to_le16(chanspec); + } + } else { + brcmf_dbg(SCAN, "Scanning all channels\n"); + } + + /* Copy ssid array if applicable */ + brcmf_dbg(SCAN, "### List of SSIDs to scan ### %d\n", n_ssids); + if (n_ssids > 0) { + offset = offsetof(struct brcmf_scan_params_v2_le, channel_list) + + n_channels * sizeof(u16); + offset = roundup(offset, sizeof(u32)); + length += sizeof(ssid_le) * n_ssids, + ptr = (char *)params_le + offset; + for (i = 0; i < n_ssids; i++) { + memset(&ssid_le, 0, sizeof(ssid_le)); + ssid_le.SSID_len = + cpu_to_le32(request->ssids[i].ssid_len); + memcpy(ssid_le.SSID, request->ssids[i].ssid, + request->ssids[i].ssid_len); + if (!ssid_le.SSID_len) + brcmf_dbg(SCAN, "%d: Broadcast scan\n", i); + else + brcmf_dbg(SCAN, "%d: scan for %.32s size=%d\n", + i, ssid_le.SSID, ssid_le.SSID_len); + memcpy(ptr, &ssid_le, sizeof(ssid_le)); + ptr += sizeof(ssid_le); + } + } else { + brcmf_dbg(SCAN, "Performing passive scan\n"); + params_le->scan_type = cpu_to_le32(BRCMF_SCANTYPE_PASSIVE); + } + params_le->length = cpu_to_le16(length); + /* Adding mask to channel numbers */ + params_le->channel_num = + cpu_to_le32((n_ssids << BRCMF_SCAN_PARAMS_NSSID_SHIFT) | + (n_channels & BRCMF_SCAN_PARAMS_COUNT_MASK)); +} + s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp, bool aborted, bool fw_abort) { struct brcmf_pub *drvr = cfg->pub; - struct brcmf_scan_params_le params_le; + struct brcmf_scan_params_v2_le params_v2_le; struct cfg80211_scan_request *scan_request; u64 reqid; u32 bucket; @@ -1063,20 +1185,23 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ brcmf_dbg(SCAN, "ABORT scan in firmware\n"); - memset(¶ms_le, 0, sizeof(params_le)); - eth_broadcast_addr(params_le.bssid); - params_le.bss_type = DOT11_BSSTYPE_ANY; - params_le.scan_type = 0; - params_le.channel_num = cpu_to_le32(1); - params_le.nprobes = cpu_to_le32(1); - params_le.active_time = cpu_to_le32(-1); - params_le.passive_time = cpu_to_le32(-1); - params_le.home_time = cpu_to_le32(-1); - /* Scan is aborted by setting channel_list[0] to -1 */ - params_le.channel_list[0] = cpu_to_le16(-1); + + brcmf_escan_prep(cfg, ¶ms_v2_le, NULL); + /* E-Scan (or anyother type) can be aborted by SCAN */ - err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN, - ¶ms_le, sizeof(params_le)); + if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_SCAN_V2)) { + err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN, + ¶ms_v2_le, + sizeof(params_v2_le)); + } else { + struct brcmf_scan_params_le params_le; + + brcmf_scan_params_v2_to_v1(¶ms_v2_le, ¶ms_le); + err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN, + ¶ms_le, + sizeof(params_le)); + } + if (err) bphy_err(drvr, "Scan abort failed\n"); } @@ -1295,83 +1420,13 @@ done: return err; } -static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, - struct brcmf_scan_params_le *params_le, - struct cfg80211_scan_request *request) -{ - u32 n_ssids; - u32 n_channels; - s32 i; - s32 offset; - u16 chanspec; - char *ptr; - struct brcmf_ssid_le ssid_le; - - eth_broadcast_addr(params_le->bssid); - params_le->bss_type = DOT11_BSSTYPE_ANY; - params_le->scan_type = BRCMF_SCANTYPE_ACTIVE; - params_le->channel_num = 0; - params_le->nprobes = cpu_to_le32(-1); - params_le->active_time = cpu_to_le32(-1); - params_le->passive_time = cpu_to_le32(-1); - params_le->home_time = cpu_to_le32(-1); - memset(¶ms_le->ssid_le, 0, sizeof(params_le->ssid_le)); - - n_ssids = request->n_ssids; - n_channels = request->n_channels; - - /* Copy channel array if applicable */ - brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n", - n_channels); - if (n_channels > 0) { - for (i = 0; i < n_channels; i++) { - chanspec = channel_to_chanspec(&cfg->d11inf, - request->channels[i]); - brcmf_dbg(SCAN, "Chan : %d, Channel spec: %x\n", - request->channels[i]->hw_value, chanspec); - params_le->channel_list[i] = cpu_to_le16(chanspec); - } - } else { - brcmf_dbg(SCAN, "Scanning all channels\n"); - } - /* Copy ssid array if applicable */ - brcmf_dbg(SCAN, "### List of SSIDs to scan ### %d\n", n_ssids); - if (n_ssids > 0) { - offset = offsetof(struct brcmf_scan_params_le, channel_list) + - n_channels * sizeof(u16); - offset = roundup(offset, sizeof(u32)); - ptr = (char *)params_le + offset; - for (i = 0; i < n_ssids; i++) { - memset(&ssid_le, 0, sizeof(ssid_le)); - ssid_le.SSID_len = - cpu_to_le32(request->ssids[i].ssid_len); - memcpy(ssid_le.SSID, request->ssids[i].ssid, - request->ssids[i].ssid_len); - if (!ssid_le.SSID_len) - brcmf_dbg(SCAN, "%d: Broadcast scan\n", i); - else - brcmf_dbg(SCAN, "%d: scan for %.32s size=%d\n", - i, ssid_le.SSID, ssid_le.SSID_len); - memcpy(ptr, &ssid_le, sizeof(ssid_le)); - ptr += sizeof(ssid_le); - } - } else { - brcmf_dbg(SCAN, "Performing passive scan\n"); - params_le->scan_type = BRCMF_SCANTYPE_PASSIVE; - } - /* Adding mask to channel numbers */ - params_le->channel_num = - cpu_to_le32((n_ssids << BRCMF_SCAN_PARAMS_NSSID_SHIFT) | - (n_channels & BRCMF_SCAN_PARAMS_COUNT_MASK)); -} - static s32 brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp, struct cfg80211_scan_request *request) { struct brcmf_pub *drvr = cfg->pub; - s32 params_size = BRCMF_SCAN_PARAMS_FIXED_SIZE + - offsetof(struct brcmf_escan_params_le, params_le); + s32 params_size = BRCMF_SCAN_PARAMS_V2_FIXED_SIZE + + offsetof(struct brcmf_escan_params_le, params_v2_le); struct brcmf_escan_params_le *params; s32 err = 0; @@ -1391,8 +1446,22 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp, goto exit; } BUG_ON(params_size + sizeof("escan") >= BRCMF_DCMD_MEDLEN); - brcmf_escan_prep(cfg, ¶ms->params_le, request); - params->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION); + brcmf_escan_prep(cfg, ¶ms->params_v2_le, request); + + params->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION_V2); + + if (!brcmf_feat_is_enabled(ifp, BRCMF_FEAT_SCAN_V2)) { + struct brcmf_escan_params_le *params_v1; + + params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE; + params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE; + params_v1 = kzalloc(params_size, GFP_KERNEL); + params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION); + brcmf_scan_params_v2_to_v1(¶ms->params_v2_le, ¶ms_v1->params_le); + kfree(params); + params = params_v1; + } + params->action = cpu_to_le16(WL_ESCAN_ACTION_START); params->sync_id = cpu_to_le16(0x1234); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index 10bac865d7243..b6797f800e55a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -290,6 +290,7 @@ void brcmf_feat_attach(struct brcmf_pub *drvr) ifp->drvr->feat_flags |= BIT(BRCMF_FEAT_SCAN_RANDOM_MAC); brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_FWSUP, "sup_wpa"); + brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_SCAN_V2, "scan_ver"); if (drvr->settings->feature_disable) { brcmf_dbg(INFO, "Features: 0x%02x, disable: 0x%02x\n", diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h index f1b086a69d735..549298c55b558 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h @@ -30,6 +30,7 @@ * SAE: simultaneous authentication of equals * FWAUTH: Firmware authenticator * DUMP_OBSS: Firmware has capable to dump obss info to support ACS + * SCAN_V2: Version 2 scan params */ #define BRCMF_FEAT_LIST \ BRCMF_FEAT_DEF(MBSS) \ @@ -53,7 +54,8 @@ BRCMF_FEAT_DEF(DOT11H) \ BRCMF_FEAT_DEF(SAE) \ BRCMF_FEAT_DEF(FWAUTH) \ - BRCMF_FEAT_DEF(DUMP_OBSS) + BRCMF_FEAT_DEF(DUMP_OBSS) \ + BRCMF_FEAT_DEF(SCAN_V2) /* * Quirks: diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index 04e1beedfd81f..b3844d0d1adbc 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -48,6 +48,10 @@ /* size of brcmf_scan_params not including variable length array */ #define BRCMF_SCAN_PARAMS_FIXED_SIZE 64 +#define BRCMF_SCAN_PARAMS_V2_FIXED_SIZE 72 + +/* version of brcmf_scan_params structure */ +#define BRCMF_SCAN_PARAMS_VERSION_V2 2 /* masks for channel and ssid count */ #define BRCMF_SCAN_PARAMS_COUNT_MASK 0x0000ffff @@ -67,6 +71,7 @@ #define BRCMF_PRIMARY_KEY (1 << 1) #define DOT11_BSSTYPE_ANY 2 #define BRCMF_ESCAN_REQ_VERSION 1 +#define BRCMF_ESCAN_REQ_VERSION_V2 2 #define BRCMF_MAXRATES_IN_SET 16 /* max # of rates in rateset */ @@ -386,6 +391,45 @@ struct brcmf_scan_params_le { __le16 channel_list[1]; /* list of chanspecs */ }; +struct brcmf_scan_params_v2_le { + __le16 version; /* structure version */ + __le16 length; /* structure length */ + struct brcmf_ssid_le ssid_le; /* default: {0, ""} */ + u8 bssid[ETH_ALEN]; /* default: bcast */ + s8 bss_type; /* default: any, + * DOT11_BSSTYPE_ANY/INFRASTRUCTURE/INDEPENDENT + */ + u8 pad; + __le32 scan_type; /* flags, 0 use default */ + __le32 nprobes; /* -1 use default, number of probes per channel */ + __le32 active_time; /* -1 use default, dwell time per channel for + * active scanning + */ + __le32 passive_time; /* -1 use default, dwell time per channel + * for passive scanning + */ + __le32 home_time; /* -1 use default, dwell time for the + * home channel between channel scans + */ + __le32 channel_num; /* count of channels and ssids that follow + * + * low half is count of channels in + * channel_list, 0 means default (use all + * available channels) + * + * high half is entries in struct brcmf_ssid + * array that follows channel_list, aligned for + * s32 (4 bytes) meaning an odd channel count + * implies a 2-byte pad between end of + * channel_list and first ssid + * + * if ssid count is zero, single ssid in the + * fixed parameter portion is assumed, otherwise + * ssid in the fixed portion is ignored + */ + __le16 channel_list[1]; /* list of chanspecs */ +}; + struct brcmf_scan_results { u32 buflen; u32 version; @@ -397,7 +441,10 @@ struct brcmf_escan_params_le { __le32 version; __le16 action; __le16 sync_id; - struct brcmf_scan_params_le params_le; + union { + struct brcmf_scan_params_le params_le; + struct brcmf_scan_params_v2_le params_v2_le; + }; }; struct brcmf_escan_result_le { -- GitLab From d75ef1f81e42dfccfeb97952e2ab70376832cf7a Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:17 +0900 Subject: [PATCH 0065/2078] wifi: brcmfmac: feature: Add support for setting feats based on WLC version The "wlc_ver" iovar returns information on the WLC and EPI versions. This can be used to determine whether the PMKID_V2 and _V3 features are supported. Reviewed-by: Linus Walleij Acked-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-4-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/feature.c | 48 +++++++++++++++++++ .../broadcom/brcm80211/brcmfmac/feature.h | 4 +- .../broadcom/brcm80211/brcmfmac/fwil_types.h | 25 ++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index b6797f800e55a..6d10c9efbe93d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -126,6 +126,53 @@ static void brcmf_feat_firmware_overrides(struct brcmf_pub *drv) drv->feat_flags |= feat_flags; } +struct brcmf_feat_wlcfeat { + u16 min_ver_major; + u16 min_ver_minor; + u32 feat_flags; +}; + +static const struct brcmf_feat_wlcfeat brcmf_feat_wlcfeat_map[] = { + { 12, 0, BIT(BRCMF_FEAT_PMKID_V2) }, + { 13, 0, BIT(BRCMF_FEAT_PMKID_V3) }, +}; + +static void brcmf_feat_wlc_version_overrides(struct brcmf_pub *drv) +{ + struct brcmf_if *ifp = brcmf_get_ifp(drv, 0); + const struct brcmf_feat_wlcfeat *e; + struct brcmf_wlc_version_le ver; + u32 feat_flags = 0; + int i, err, major, minor; + + err = brcmf_fil_iovar_data_get(ifp, "wlc_ver", &ver, sizeof(ver)); + if (err) + return; + + major = le16_to_cpu(ver.wlc_ver_major); + minor = le16_to_cpu(ver.wlc_ver_minor); + + brcmf_dbg(INFO, "WLC version: %d.%d\n", major, minor); + + for (i = 0; i < ARRAY_SIZE(brcmf_feat_wlcfeat_map); i++) { + e = &brcmf_feat_wlcfeat_map[i]; + if (major > e->min_ver_major || + (major == e->min_ver_major && + minor >= e->min_ver_minor)) { + feat_flags |= e->feat_flags; + } + } + + if (!feat_flags) + return; + + for (i = 0; i < BRCMF_FEAT_LAST; i++) + if (feat_flags & BIT(i)) + brcmf_dbg(INFO, "enabling firmware feature: %s\n", + brcmf_feat_names[i]); + drv->feat_flags |= feat_flags; +} + /** * brcmf_feat_iovar_int_get() - determine feature through iovar query. * @@ -299,6 +346,7 @@ void brcmf_feat_attach(struct brcmf_pub *drvr) ifp->drvr->feat_flags &= ~drvr->settings->feature_disable; } + brcmf_feat_wlc_version_overrides(drvr); brcmf_feat_firmware_overrides(drvr); /* set chip related quirks */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h index 549298c55b558..7f4f0b3e4a7b4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h @@ -55,7 +55,9 @@ BRCMF_FEAT_DEF(SAE) \ BRCMF_FEAT_DEF(FWAUTH) \ BRCMF_FEAT_DEF(DUMP_OBSS) \ - BRCMF_FEAT_DEF(SCAN_V2) + BRCMF_FEAT_DEF(SCAN_V2) \ + BRCMF_FEAT_DEF(PMKID_V2) \ + BRCMF_FEAT_DEF(PMKID_V3) /* * Quirks: diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index b3844d0d1adbc..801709c26b7bf 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -788,6 +788,31 @@ struct brcmf_rev_info_le { __le32 nvramrev; }; +/** + * struct brcmf_wlc_version_le - firmware revision info. + * + * @version: structure version. + * @length: structure length. + * @epi_ver_major: EPI major version + * @epi_ver_minor: EPI minor version + * @epi_ver_rc: EPI rc version + * @epi_ver_incr: EPI increment version + * @wlc_ver_major: WLC major version + * @wlc_ver_minor: WLC minor version + */ +struct brcmf_wlc_version_le { + __le16 version; + __le16 length; + + __le16 epi_ver_major; + __le16 epi_ver_minor; + __le16 epi_ver_rc; + __le16 epi_ver_incr; + + __le16 wlc_ver_major; + __le16 wlc_ver_minor; +}; + /** * struct brcmf_assoclist_le - request assoc list. * -- GitLab From a96202acaea47fa8377088e0952bb63bd02a3bab Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:18 +0900 Subject: [PATCH 0066/2078] wifi: brcmfmac: cfg80211: Add support for PMKID_V3 operations Add support for the new PMKID_V3 API, which allows performing PMKID mutations individually, instead of requiring the driver to keep track of the full list. This new API is required by at least BCM4387. Note that PMKID_V2 is not implemented yet. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-5-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 52 +++++++++++- .../broadcom/brcm80211/brcmfmac/fwil_types.h | 83 +++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 77706e204a8f3..b561ca6b467e8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4306,6 +4306,37 @@ exit: return 0; } +static s32 +brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, + bool alive) +{ + struct brcmf_pmk_op_v3_le *pmk_op; + int length = offsetof(struct brcmf_pmk_op_v3_le, pmk); + int ret; + + pmk_op = kzalloc(sizeof(*pmk_op), GFP_KERNEL); + pmk_op->version = cpu_to_le16(BRCMF_PMKSA_VER_3); + + if (!pmksa) { + /* Flush operation, operate on entire list */ + pmk_op->count = cpu_to_le16(0); + } else { + /* Single PMK operation */ + pmk_op->count = cpu_to_le16(1); + length += sizeof(struct brcmf_pmksa_v3); + memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); + memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); + pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0); + } + + pmk_op->length = cpu_to_le16(length); + + ret = brcmf_fil_iovar_data_set(ifp, "pmkid_info", pmk_op, sizeof(*pmk_op)); + kfree(pmk_op); + return ret; +} + static __used s32 brcmf_update_pmklist(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp) { @@ -4339,6 +4370,14 @@ brcmf_cfg80211_set_pmksa(struct wiphy *wiphy, struct net_device *ndev, if (!check_vif_up(ifp->vif)) return -EIO; + brcmf_dbg(CONN, "set_pmksa - PMK bssid: %pM =\n", pmksa->bssid); + brcmf_dbg(CONN, "%*ph\n", WLAN_PMKID_LEN, pmksa->pmkid); + + if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3)) + return brcmf_pmksa_v3_op(ifp, pmksa, true); + + /* TODO: implement PMKID_V2 */ + npmk = le32_to_cpu(cfg->pmk_list.npmk); for (i = 0; i < npmk; i++) if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN)) @@ -4355,9 +4394,6 @@ brcmf_cfg80211_set_pmksa(struct wiphy *wiphy, struct net_device *ndev, return -EINVAL; } - brcmf_dbg(CONN, "set_pmksa - PMK bssid: %pM =\n", pmk[npmk].bssid); - brcmf_dbg(CONN, "%*ph\n", WLAN_PMKID_LEN, pmk[npmk].pmkid); - err = brcmf_update_pmklist(cfg, ifp); brcmf_dbg(TRACE, "Exit\n"); @@ -4381,6 +4417,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev, brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid); + if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3)) + return brcmf_pmksa_v3_op(ifp, pmksa, false); + + /* TODO: implement PMKID_V2 */ + npmk = le32_to_cpu(cfg->pmk_list.npmk); for (i = 0; i < npmk; i++) if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN)) @@ -4417,6 +4458,11 @@ brcmf_cfg80211_flush_pmksa(struct wiphy *wiphy, struct net_device *ndev) if (!check_vif_up(ifp->vif)) return -EIO; + if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3)) + return brcmf_pmksa_v3_op(ifp, NULL, false); + + /* TODO: implement PMKID_V2 */ + memset(&cfg->pmk_list, 0, sizeof(cfg->pmk_list)); err = brcmf_update_pmklist(cfg, ifp); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index 801709c26b7bf..792adaf880b44 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -174,6 +174,10 @@ #define BRCMF_HE_CAP_MCS_MAP_NSS_MAX 8 +#define BRCMF_PMKSA_VER_2 2 +#define BRCMF_PMKSA_VER_3 3 +#define BRCMF_PMKSA_NO_EXPIRY 0xffffffff + /* MAX_CHUNK_LEN is the maximum length for data passing to firmware in each * ioctl. It is relatively small because firmware has small maximum size input * playload restriction for ioctls. @@ -355,6 +359,12 @@ struct brcmf_ssid_le { unsigned char SSID[IEEE80211_MAX_SSID_LEN]; }; +/* Alternate SSID structure used in some places... */ +struct brcmf_ssid8_le { + u8 SSID_len; + unsigned char SSID[IEEE80211_MAX_SSID_LEN]; +}; + struct brcmf_scan_params_le { struct brcmf_ssid_le ssid_le; /* default: {0, ""} */ u8 bssid[ETH_ALEN]; /* default: bcast */ @@ -875,6 +885,51 @@ struct brcmf_pmksa { u8 pmkid[WLAN_PMKID_LEN]; }; +/** + * struct brcmf_pmksa_v2 - PMK Security Association + * + * @length: Length of the structure. + * @bssid: The AP's BSSID. + * @pmkid: The PMK ID. + * @pmk: PMK material for FILS key derivation. + * @pmk_len: Length of PMK data. + * @ssid: The AP's SSID. + * @fils_cache_id: FILS cache identifier + */ +struct brcmf_pmksa_v2 { + __le16 length; + u8 bssid[ETH_ALEN]; + u8 pmkid[WLAN_PMKID_LEN]; + u8 pmk[WLAN_PMK_LEN_SUITE_B_192]; + __le16 pmk_len; + struct brcmf_ssid8_le ssid; + u16 fils_cache_id; +}; + +/** + * struct brcmf_pmksa_v3 - PMK Security Association + * + * @bssid: The AP's BSSID. + * @pmkid: The PMK ID. + * @pmkid_len: The length of the PMK ID. + * @pmk: PMK material for FILS key derivation. + * @pmk_len: Length of PMK data. + * @fils_cache_id: FILS cache identifier + * @ssid: The AP's SSID. + * @time_left: Remaining time until expiry. 0 = expired, ~0 = no expiry. + */ +struct brcmf_pmksa_v3 { + u8 bssid[ETH_ALEN]; + u8 pmkid[WLAN_PMKID_LEN]; + u8 pmkid_len; + u8 pmk[WLAN_PMK_LEN_SUITE_B_192]; + u8 pmk_len; + __le16 fils_cache_id; + u8 pad; + struct brcmf_ssid8_le ssid; + __le32 time_left; +}; + /** * struct brcmf_pmk_list_le - List of pmksa's. * @@ -886,6 +941,34 @@ struct brcmf_pmk_list_le { struct brcmf_pmksa pmk[BRCMF_MAXPMKID]; }; +/** + * struct brcmf_pmk_list_v2_le - List of pmksa's. + * + * @version: Request version. + * @length: Length of this structure. + * @pmk: PMK SA information. + */ +struct brcmf_pmk_list_v2_le { + __le16 version; + __le16 length; + struct brcmf_pmksa_v2 pmk[BRCMF_MAXPMKID]; +}; + +/** + * struct brcmf_pmk_op_v3_le - Operation on PMKSA list. + * + * @version: Request version. + * @length: Length of this structure. + * @pmk: PMK SA information. + */ +struct brcmf_pmk_op_v3_le { + __le16 version; + __le16 length; + __le16 count; + __le16 pad; + struct brcmf_pmksa_v3 pmk[BRCMF_MAXPMKID]; +}; + /** * struct brcmf_pno_param_le - PNO scan configuration parameters * -- GitLab From 89b89e52153fda2733562776c7c9d9d3ebf8dd6d Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:19 +0900 Subject: [PATCH 0067/2078] wifi: brcmfmac: cfg80211: Pass the PMK in binary instead of hex Apparently the hex passphrase mechanism does not work on newer chips/firmware (e.g. BCM4387). It seems there was a simple way of passing it in binary all along, so use that and avoid the hexification. OpenBSD has been doing it like this from the beginning, so this should work on all chips. Also clear the structure before setting the PMK. This was leaking uninitialized stack contents to the device. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-6-marcan@marcan.st --- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b561ca6b467e8..e0a70a671550c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1686,13 +1686,14 @@ static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len) { struct brcmf_pub *drvr = ifp->drvr; struct brcmf_wsec_pmk_le pmk; - int i, err; + int err; + + memset(&pmk, 0, sizeof(pmk)); - /* convert to firmware key format */ - pmk.key_len = cpu_to_le16(pmk_len << 1); - pmk.flags = cpu_to_le16(BRCMF_WSEC_PASSPHRASE); - for (i = 0; i < pmk_len; i++) - snprintf(&pmk.key[2 * i], 3, "%02x", pmk_data[i]); + /* pass pmk directly */ + pmk.key_len = cpu_to_le16(pmk_len); + pmk.flags = cpu_to_le16(0); + memcpy(pmk.key, pmk_data, pmk_len); /* store psk in firmware */ err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK, -- GitLab From 117ace4014cce3fb78b40eb8028bb0f4fc37dd6f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:20 +0900 Subject: [PATCH 0068/2078] wifi: brcmfmac: pcie: Add IDs/properties for BCM4387 This chip is present on Apple M1 Pro/Max (t600x) platforms: * maldives (apple,j314s): MacBook Pro (14-inch, M1 Pro, 2021) * maldives (apple,j314c): MacBook Pro (14-inch, M1 Max, 2021) * madagascar (apple,j316s): MacBook Pro (16-inch, M1 Pro, 2021) * madagascar (apple,j316c): MacBook Pro (16-inch, M1 Max, 2021) Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-7-marcan@marcan.st --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c | 2 ++ drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 8 ++++++++ .../net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index 50e0c015cf43a..9f9bf08a70bb4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -743,6 +743,8 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci) return 0x170000; case BRCM_CC_4378_CHIP_ID: return 0x352000; + case BRCM_CC_4387_CHIP_ID: + return 0x740000; default: brcmf_err("unknown chip: %s\n", ci->pub.name); break; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 2835ef4edb18f..d2dad5414f396 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -67,6 +67,7 @@ BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie"); BRCMF_FW_DEF(4371, "brcmfmac4371-pcie"); BRCMF_FW_CLM_DEF(4377B3, "brcmfmac4377b3-pcie"); BRCMF_FW_CLM_DEF(4378B1, "brcmfmac4378b1-pcie"); +BRCMF_FW_CLM_DEF(4387C2, "brcmfmac4387c2-pcie"); /* firmware config files */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.txt"); @@ -101,6 +102,7 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371), BRCMF_FW_ENTRY(BRCM_CC_4377_CHIP_ID, 0xFFFFFFFF, 4377B3), /* revision ID 4 */ BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0xFFFFFFFF, 4378B1), /* revision ID 3 */ + BRCMF_FW_ENTRY(BRCM_CC_4387_CHIP_ID, 0xFFFFFFFF, 4387C2), /* revision ID 7 */ }; #define BRCMF_PCIE_FW_UP_TIMEOUT 5000 /* msec */ @@ -2048,6 +2050,11 @@ static int brcmf_pcie_read_otp(struct brcmf_pciedev_info *devinfo) base = 0x1120; words = 0x170; break; + case BRCM_CC_4387_CHIP_ID: + coreid = BCMA_CORE_GCI; + base = 0x113c; + words = 0x170; + break; default: /* OTP not supported on this chip */ return 0; @@ -2662,6 +2669,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_43596_DEVICE_ID, CYW), BRCMF_PCIE_DEVICE(BRCM_PCIE_4377_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_4378_DEVICE_ID, WCC), + BRCMF_PCIE_DEVICE(BRCM_PCIE_4387_DEVICE_ID, WCC), { /* end: all zeroes */ } }; diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h index 896615f579522..44684bf1b9acc 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h @@ -54,6 +54,7 @@ #define BRCM_CC_4371_CHIP_ID 0x4371 #define BRCM_CC_4377_CHIP_ID 0x4377 #define BRCM_CC_4378_CHIP_ID 0x4378 +#define BRCM_CC_4387_CHIP_ID 0x4387 #define CY_CC_4373_CHIP_ID 0x4373 #define CY_CC_43012_CHIP_ID 43012 #define CY_CC_43439_CHIP_ID 43439 @@ -95,6 +96,7 @@ #define BRCM_PCIE_43596_DEVICE_ID 0x4415 #define BRCM_PCIE_4377_DEVICE_ID 0x4488 #define BRCM_PCIE_4378_DEVICE_ID 0x4425 +#define BRCM_PCIE_4387_DEVICE_ID 0x4433 /* brcmsmac IDs */ #define BCM4313_D11N2G_ID 0x4727 /* 4313 802.11n 2.4G device */ -- GitLab From dd7e55401fec58a2d03e5fdcb0c6d20e8fbe450a Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:21 +0900 Subject: [PATCH 0069/2078] wifi: brcmfmac: common: Add support for downloading TxCap blobs The TxCap blobs are additional data blobs used on Apple devices, and are uploaded analogously to CLM blobs. Add core support for doing this. Acked-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-8-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/bus.h | 1 + .../broadcom/brcm80211/brcmfmac/common.c | 93 ++++++++++++++----- 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h index 501136e011b55..fe31051a9e11b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h @@ -55,6 +55,7 @@ enum brcmf_bus_protocol_type { /* Firmware blobs that may be available */ enum brcmf_blob_type { BRCMF_BLOB_CLM, + BRCMF_BLOB_TXCAP, }; struct brcmf_mp_device; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 4fea6f0ea8c88..88e81b83af9d6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -101,7 +101,7 @@ void brcmf_c_set_joinpref_default(struct brcmf_if *ifp) static int brcmf_c_download(struct brcmf_if *ifp, u16 flag, struct brcmf_dload_data_le *dload_buf, - u32 len) + u32 len, const char *var) { s32 err; @@ -111,18 +111,18 @@ static int brcmf_c_download(struct brcmf_if *ifp, u16 flag, dload_buf->len = cpu_to_le32(len); dload_buf->crc = cpu_to_le32(0); - err = brcmf_fil_iovar_data_set(ifp, "clmload", dload_buf, + err = brcmf_fil_iovar_data_set(ifp, var, dload_buf, struct_size(dload_buf, data, len)); return err; } -static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) +static int brcmf_c_download_blob(struct brcmf_if *ifp, + const void *data, size_t size, + const char *loadvar, const char *statvar) { struct brcmf_pub *drvr = ifp->drvr; - struct brcmf_bus *bus = drvr->bus_if; struct brcmf_dload_data_le *chunk_buf; - const struct firmware *clm = NULL; u32 chunk_len; u32 datalen; u32 cumulative_len; @@ -132,21 +132,14 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) brcmf_dbg(TRACE, "Enter\n"); - err = brcmf_bus_get_blob(bus, &clm, BRCMF_BLOB_CLM); - if (err || !clm) { - brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n", - err); - return 0; - } - chunk_buf = kzalloc(struct_size(chunk_buf, data, MAX_CHUNK_LEN), GFP_KERNEL); if (!chunk_buf) { err = -ENOMEM; - goto done; + return -ENOMEM; } - datalen = clm->size; + datalen = size; cumulative_len = 0; do { if (datalen > MAX_CHUNK_LEN) { @@ -155,9 +148,10 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) chunk_len = datalen; dl_flag |= DL_END; } - memcpy(chunk_buf->data, clm->data + cumulative_len, chunk_len); + memcpy(chunk_buf->data, data + cumulative_len, chunk_len); - err = brcmf_c_download(ifp, dl_flag, chunk_buf, chunk_len); + err = brcmf_c_download(ifp, dl_flag, chunk_buf, chunk_len, + loadvar); dl_flag &= ~DL_BEGIN; @@ -166,20 +160,64 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) } while ((datalen > 0) && (err == 0)); if (err) { - bphy_err(drvr, "clmload (%zu byte file) failed (%d)\n", - clm->size, err); - /* Retrieve clmload_status and print */ - err = brcmf_fil_iovar_int_get(ifp, "clmload_status", &status); + bphy_err(drvr, "%s (%zu byte file) failed (%d)\n", + loadvar, size, err); + /* Retrieve status and print */ + err = brcmf_fil_iovar_int_get(ifp, statvar, &status); if (err) - bphy_err(drvr, "get clmload_status failed (%d)\n", err); + bphy_err(drvr, "get %s failed (%d)\n", statvar, err); else - brcmf_dbg(INFO, "clmload_status=%d\n", status); + brcmf_dbg(INFO, "%s=%d\n", statvar, status); err = -EIO; } kfree(chunk_buf); -done: - release_firmware(clm); + return err; +} + +static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) +{ + struct brcmf_pub *drvr = ifp->drvr; + struct brcmf_bus *bus = drvr->bus_if; + const struct firmware *fw = NULL; + s32 err; + + brcmf_dbg(TRACE, "Enter\n"); + + err = brcmf_bus_get_blob(bus, &fw, BRCMF_BLOB_CLM); + if (err || !fw) { + brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n", + err); + return 0; + } + + err = brcmf_c_download_blob(ifp, fw->data, fw->size, + "clmload", "clmload_status"); + + release_firmware(fw); + return err; +} + +static int brcmf_c_process_txcap_blob(struct brcmf_if *ifp) +{ + struct brcmf_pub *drvr = ifp->drvr; + struct brcmf_bus *bus = drvr->bus_if; + const struct firmware *fw = NULL; + s32 err; + + brcmf_dbg(TRACE, "Enter\n"); + + err = brcmf_bus_get_blob(bus, &fw, BRCMF_BLOB_TXCAP); + if (err || !fw) { + brcmf_info("no txcap_blob available (err=%d)\n", err); + return 0; + } + + brcmf_info("TxCap blob found, loading\n"); + err = brcmf_c_download_blob(ifp, fw->data, fw->size, + "txcapload", "txcapload_status"); + + release_firmware(fw); return err; } @@ -291,6 +329,13 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) goto done; } + /* Do TxCap downloading, if needed */ + err = brcmf_c_process_txcap_blob(ifp); + if (err < 0) { + bphy_err(drvr, "download TxCap blob file failed, %d\n", err); + goto done; + } + /* query for 'ver' to get version info from firmware */ memset(buf, 0, sizeof(buf)); err = brcmf_fil_iovar_data_get(ifp, "ver", buf, sizeof(buf)); -- GitLab From 75102b7543ed87cf7d599a382e7340e572da6987 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:22 +0900 Subject: [PATCH 0070/2078] wifi: brcmfmac: pcie: Load and provide TxCap blobs These blobs are named .txcap_blob, and exist alongside the existing .clm_blob files. Use the existing firmware machinery to provide them to the core. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-9-marcan@marcan.st --- .../net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index d2dad5414f396..0fcdd84bbed0d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -76,6 +76,7 @@ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt"); /* per-board firmware binaries */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.bin"); MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.clm_blob"); +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txcap_blob"); static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602), @@ -329,7 +330,9 @@ struct brcmf_pciedev_info { char fw_name[BRCMF_FW_NAME_LEN]; char nvram_name[BRCMF_FW_NAME_LEN]; char clm_name[BRCMF_FW_NAME_LEN]; + char txcap_name[BRCMF_FW_NAME_LEN]; const struct firmware *clm_fw; + const struct firmware *txcap_fw; const struct brcmf_pcie_reginfo *reginfo; void __iomem *regs; void __iomem *tcm; @@ -1520,6 +1523,10 @@ static int brcmf_pcie_get_blob(struct device *dev, const struct firmware **fw, *fw = devinfo->clm_fw; devinfo->clm_fw = NULL; break; + case BRCMF_BLOB_TXCAP: + *fw = devinfo->txcap_fw; + devinfo->txcap_fw = NULL; + break; default: return -ENOENT; } @@ -2112,6 +2119,7 @@ static int brcmf_pcie_read_otp(struct brcmf_pciedev_info *devinfo) #define BRCMF_PCIE_FW_CODE 0 #define BRCMF_PCIE_FW_NVRAM 1 #define BRCMF_PCIE_FW_CLM 2 +#define BRCMF_PCIE_FW_TXCAP 3 static void brcmf_pcie_setup(struct device *dev, int ret, struct brcmf_fw_request *fwreq) @@ -2138,6 +2146,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret, nvram = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.data; nvram_len = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.len; devinfo->clm_fw = fwreq->items[BRCMF_PCIE_FW_CLM].binary; + devinfo->txcap_fw = fwreq->items[BRCMF_PCIE_FW_TXCAP].binary; kfree(fwreq); ret = brcmf_chip_get_raminfo(devinfo->ci); @@ -2219,6 +2228,7 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo) { ".bin", devinfo->fw_name }, { ".txt", devinfo->nvram_name }, { ".clm_blob", devinfo->clm_name }, + { ".txcap_blob", devinfo->txcap_name }, }; fwreq = brcmf_fw_alloc_request(devinfo->ci->chip, devinfo->ci->chiprev, @@ -2233,6 +2243,8 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo) fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL; fwreq->items[BRCMF_PCIE_FW_CLM].type = BRCMF_FW_TYPE_BINARY; fwreq->items[BRCMF_PCIE_FW_CLM].flags = BRCMF_FW_REQF_OPTIONAL; + fwreq->items[BRCMF_PCIE_FW_TXCAP].type = BRCMF_FW_TYPE_BINARY; + fwreq->items[BRCMF_PCIE_FW_TXCAP].flags = BRCMF_FW_REQF_OPTIONAL; /* NVRAM reserves PCI domain 0 for Broadcom's SDK faked bus */ fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus) + 1; fwreq->bus_nr = devinfo->pdev->bus->number; @@ -2530,6 +2542,7 @@ brcmf_pcie_remove(struct pci_dev *pdev) brcmf_pcie_reset_device(devinfo); brcmf_pcie_release_resource(devinfo); release_firmware(devinfo->clm_fw); + release_firmware(devinfo->txcap_fw); if (devinfo->ci) brcmf_chip_detach(devinfo->ci); -- GitLab From 5b3ee9987f5856080509e62968f812961173d336 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:24:23 +0900 Subject: [PATCH 0071/2078] wifi: brcmfmac: common: Add support for external calibration blobs The calibration blob for a chip is normally stored in SROM and loaded internally by the firmware. However, Apple ARM64 platforms instead store it as part of platform configuration data, and provide it via the Apple Device Tree. We forward this into the Linux DT in the bootloader. Add support for taking this blob from the DT and loading it into the dongle. The loading mechanism is the same as used for the CLM and TxCap blobs. Reviewed-by: Linus Walleij Reviewed-by: Arend van Spriel Signed-off-by: Hector Martin Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092423.15175-10-marcan@marcan.st --- .../broadcom/brcm80211/brcmfmac/common.c | 24 +++++++++++++++++++ .../broadcom/brcm80211/brcmfmac/common.h | 2 ++ .../wireless/broadcom/brcm80211/brcmfmac/of.c | 7 ++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 88e81b83af9d6..a194b0e68eb53 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -246,6 +246,23 @@ static const u8 brcmf_default_mac_address[ETH_ALEN] = { 0x00, 0x90, 0x4c, 0xc5, 0x12, 0x38 }; +static int brcmf_c_process_cal_blob(struct brcmf_if *ifp) +{ + struct brcmf_pub *drvr = ifp->drvr; + struct brcmf_mp_device *settings = drvr->settings; + s32 err; + + brcmf_dbg(TRACE, "Enter\n"); + + if (!settings->cal_blob || !settings->cal_size) + return 0; + + brcmf_info("Calibration blob provided by platform, loading\n"); + err = brcmf_c_download_blob(ifp, settings->cal_blob, settings->cal_size, + "calload", "calload_status"); + return err; +} + int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) { struct brcmf_pub *drvr = ifp->drvr; @@ -336,6 +353,13 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp) goto done; } + /* Download external calibration blob, if available */ + err = brcmf_c_process_cal_blob(ifp); + if (err < 0) { + bphy_err(drvr, "download calibration blob file failed, %d\n", err); + goto done; + } + /* query for 'ver' to get version info from firmware */ memset(buf, 0, sizeof(buf)); err = brcmf_fil_iovar_data_get(ifp, "ver", buf, sizeof(buf)); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h index 7167fd4f8c639..2be2986d2110a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h @@ -54,6 +54,8 @@ struct brcmf_mp_device { const char *board_type; unsigned char mac[ETH_ALEN]; const char *antenna_sku; + const void *cal_blob; + int cal_size; union { struct brcmfmac_sdio_pd sdio; } bus; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index fdd0c9abc1a10..52527b61341ed 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -86,6 +86,13 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, if (!of_property_read_string(np, "apple,antenna-sku", &prop)) settings->antenna_sku = prop; + /* The WLAN calibration blob is normally stored in SROM, but Apple + * ARM64 platforms pass it via the DT instead. + */ + prop = of_get_property(np, "brcm,cal-blob", &settings->cal_size); + if (prop && settings->cal_size) + settings->cal_blob = prop; + /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); if (root && err) { -- GitLab From 1d5003d05f983eee28756896328e4949d9a97b7f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 14 Feb 2023 18:28:38 +0900 Subject: [PATCH 0072/2078] wifi: brcmfmac: pcie: Add BCM4378B3 support BCM4378B3 is a new silicon revision of BCM4378 present on the Apple M2 13" MacBook Pro "kyushu". Its PCI revision number is 5. Signed-off-by: Hector Martin Reviewed-by: Julian Calaby Reviewed-by: Linus Walleij Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230214092838.17869-1-marcan@marcan.st --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 0fcdd84bbed0d..848f05ec1268c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -67,6 +67,7 @@ BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie"); BRCMF_FW_DEF(4371, "brcmfmac4371-pcie"); BRCMF_FW_CLM_DEF(4377B3, "brcmfmac4377b3-pcie"); BRCMF_FW_CLM_DEF(4378B1, "brcmfmac4378b1-pcie"); +BRCMF_FW_CLM_DEF(4378B3, "brcmfmac4378b3-pcie"); BRCMF_FW_CLM_DEF(4387C2, "brcmfmac4387c2-pcie"); /* firmware config files */ @@ -102,7 +103,8 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_43666_CHIP_ID, 0xFFFFFFF0, 4366C), BRCMF_FW_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371), BRCMF_FW_ENTRY(BRCM_CC_4377_CHIP_ID, 0xFFFFFFFF, 4377B3), /* revision ID 4 */ - BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0xFFFFFFFF, 4378B1), /* revision ID 3 */ + BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0x0000000F, 4378B1), /* revision ID 3 */ + BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0xFFFFFFE0, 4378B3), /* revision ID 5 */ BRCMF_FW_ENTRY(BRCM_CC_4387_CHIP_ID, 0xFFFFFFFF, 4387C2), /* revision ID 7 */ }; -- GitLab From 29c66ad1c3ad1698becf81182f73fb4bd50bceb2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 24 Feb 2023 18:36:53 +0800 Subject: [PATCH 0073/2078] libbpf: Use struct user_pt_regs to define __PT_REGS_CAST() for LoongArch LoongArch provides struct user_pt_regs instead of struct pt_regs to userspace, use struct user_pt_regs to define __PT_REGS_CAST() to fix the following build error: CLNG-BPF [test_maps] loop1.bpf.o progs/loop1.c:22:9: error: incomplete definition of type 'struct pt_regs' m = PT_REGS_RC(ctx); ^~~~~~~~~~~~~~~ tools/testing/selftests/bpf/tools/include/bpf/bpf_tracing.h:493:41: note: expanded from macro 'PT_REGS_RC' #define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) ~~~~~~~~~~~~~~~~~^ tools/testing/selftests/bpf/tools/include/bpf/bpf_helper_defs.h:20:8: note: forward declaration of 'struct pt_regs' struct pt_regs; ^ 1 error generated. make: *** [Makefile:572: tools/testing/selftests/bpf/loop1.bpf.o] Error 1 make: Leaving directory 'tools/testing/selftests/bpf' Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1677235015-21717-2-git-send-email-yangtiezhu@loongson.cn --- tools/lib/bpf/bpf_tracing.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 6db88f41fa0df..137b13de08073 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -415,6 +415,8 @@ struct pt_regs___arm64 { * https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html */ +/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) #define __PT_PARM1_REG regs[4] #define __PT_PARM2_REG regs[5] #define __PT_PARM3_REG regs[6] -- GitLab From 84c22fa83f9cb97061359f97d85f53a9ccde02c4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 24 Feb 2023 18:36:55 +0800 Subject: [PATCH 0074/2078] selftests/bpf: Use __NR_prlimit64 instead of __NR_getrlimit in user_ringbuf test After commit 80d7da1cac62 ("asm-generic: Drop getrlimit and setrlimit syscalls from default list"), new architectures won't need to include getrlimit and setrlimit, they are superseded with prlimit64. In order to maintain compatibility for the new architectures, such as LoongArch which does not define __NR_getrlimit, it is better to use __NR_prlimit64 instead of __NR_getrlimit in user_ringbuf test to fix the following build error: TEST-OBJ [test_progs] user_ringbuf.test.o tools/testing/selftests/bpf/prog_tests/user_ringbuf.c: In function 'kick_kernel_cb': tools/testing/selftests/bpf/prog_tests/user_ringbuf.c:593:17: error: '__NR_getrlimit' undeclared (first use in this function) 593 | syscall(__NR_getrlimit); | ^~~~~~~~~~~~~~ tools/testing/selftests/bpf/prog_tests/user_ringbuf.c:593:17: note: each undeclared identifier is reported only once for each function it appears in make: *** [Makefile:573: tools/testing/selftests/bpf/user_ringbuf.test.o] Error 1 make: Leaving directory 'tools/testing/selftests/bpf' Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1677235015-21717-4-git-send-email-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/prog_tests/user_ringbuf.c | 2 +- tools/testing/selftests/bpf/progs/user_ringbuf_success.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index 3a13e102c1491..e51721df14fc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -590,7 +590,7 @@ static void *kick_kernel_cb(void *arg) /* Kick the kernel, causing it to drain the ring buffer and then wake * up the test thread waiting on epoll. */ - syscall(__NR_getrlimit); + syscall(__NR_prlimit64); return NULL; } diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c index b39093dd57159..0ade1110613b8 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -202,7 +202,7 @@ do_nothing_cb(struct bpf_dynptr *dynptr, void *context) return 0; } -SEC("fentry/" SYS_PREFIX "sys_getrlimit") +SEC("fentry/" SYS_PREFIX "sys_prlimit64") int test_user_ringbuf_epoll(void *ctx) { long num_samples; -- GitLab From c679bbd611c08b0559ffae079330bc4e5574696a Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Mon, 27 Feb 2023 16:08:54 +0100 Subject: [PATCH 0075/2078] tools: bpftool: Remove invalid \' json escape RFC8259 ("The JavaScript Object Notation (JSON) Data Interchange Format") only specifies \", \\, \/, \b, \f, \n, \r, and \r as valid two-character escape sequences. This does not include \', which is not required in JSON because it exclusively uses double quotes as string separators. Solidus (/) may be escaped, but does not have to. Only reverse solidus (\), double quotes ("), and the control characters have to be escaped. Therefore, with this fix, bpftool correctly supports all valid two-character escape sequences (but still does not support characters that require multi-character escape sequences). Witout this fix, attempting to load a JSON file generated by bpftool using Python 3.10.6's default json.load() may fail with the error "Invalid \escape" if the file contains the invalid escaped single quote (\'). Fixes: b66e907cfee2 ("tools: bpftool: copy JSON writer from iproute2 repository") Signed-off-by: Luis Gerhorst Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20230227150853.16863-1-gerhorst@cs.fau.de --- tools/bpf/bpftool/json_writer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index 7fea83bedf488..bca5dd0a59e34 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -80,9 +80,6 @@ static void jsonw_puts(json_writer_t *self, const char *str) case '"': fputs("\\\"", self->out); break; - case '\'': - fputs("\\\'", self->out); - break; default: putc(*str, self->out); } -- GitLab From 11e456cae91e9044cb12c2b037b52c9b268925f7 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Fri, 24 Feb 2023 23:10:02 +0800 Subject: [PATCH 0076/2078] selftests/bpf: Fix compilation errors: Assign a value to a constant Commit bc292ab00f6c("mm: introduce vma->vm_flags wrapper functions") turns the vm_flags into a const variable. Added bpf_find_vma test in commit f108662b27c9("selftests/bpf: Add tests for bpf_find_vma") to assign values to variables that declare const in find_vma_fail1.c programs, which is an error to the compiler and does not test BPF verifiers. It is better to replace 'const vm_flags_t vm_flags' with 'unsigned long vm_start' for testing. $ make -C tools/testing/selftests/bpf/ -j8 ... progs/find_vma_fail1.c:16:16: error: cannot assign to non-static data member 'vm_flags' with const-qualified type 'const vm_flags_t' (aka 'const unsigned long') vma->vm_flags |= 0x55; ~~~~~~~~~~~~~ ^ ../tools/testing/selftests/bpf/tools/include/vmlinux.h:1898:20: note: non-static data member 'vm_flags' declared const here const vm_flags_t vm_flags; ~~~~~~~~~~~`~~~~~~^~~~~~~~ Signed-off-by: Rong Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/tencent_CB281722B3C1BD504C16CDE586CACC2BE706@qq.com --- tools/testing/selftests/bpf/progs/find_vma_fail1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index b3b326b8e2d1c..47d5dedff5540 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma, struct callback_ctx *data) { /* writing to vma, which is illegal */ - vma->vm_flags |= 0x55; + vma->vm_start = 0xffffffffff600000; return 0; } -- GitLab From 06943ae675945c762bb8d5edc93d203f2b041d8d Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 23 Feb 2023 09:53:46 +0000 Subject: [PATCH 0077/2078] libbpf: Fix arm syscall regs spec in bpf_tracing.h The syscall register definitions for ARM in bpf_tracing.h doesn't define the fifth parameter for the syscalls. Because of this some KPROBES based selftests fail to compile for ARM architecture. Define the fifth parameter that is passed in the R5 register (uregs[4]). Fixes: 3a95c42d65d5 ("libbpf: Define arm syscall regs spec in bpf_tracing.h") Signed-off-by: Puranjay Mohan Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230223095346.10129-1-puranjay12@gmail.com --- tools/lib/bpf/bpf_tracing.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 137b13de08073..6fb3d0f9af17e 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -204,6 +204,7 @@ struct pt_regs___s390 { #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG #define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG uregs[4] #define __PT_PARM6_SYSCALL_REG uregs[5] #define __PT_PARM7_SYSCALL_REG uregs[6] -- GitLab From 0a504fa1a780332760f2a4369a1adc3ee9f620d0 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 21 Feb 2023 00:49:58 +0100 Subject: [PATCH 0078/2078] libbpf: Document bpf_{btf,link,map,prog}_get_info_by_fd() Replace the short informal description with the proper doc comments. Suggested-by: Andrii Nakryiko Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230220234958.764997-1-iii@linux.ibm.com --- tools/lib/bpf/bpf.h | 69 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 9ed9bceb41115..f0f7863732381 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* - * common eBPF ELF operations. + * Common BPF ELF operations. * * Copyright (C) 2013-2015 Alexei Starovoitov * Copyright (C) 2015 Wang Nan @@ -386,14 +386,73 @@ LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id, const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); -/* Type-safe variants of bpf_obj_get_info_by_fd(). The callers still needs to - * pass info_len, which should normally be - * sizeof(struct bpf_{prog,map,btf,link}_info), in order to be compatible with - * different libbpf and kernel versions. + +/** + * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF + * program corresponding to *prog_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. + * + * @param prog_fd BPF program file descriptor + * @param info pointer to **struct bpf_prog_info** that will be populated with + * BPF program information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) */ LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len); + +/** + * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF + * map corresponding to *map_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. + * + * @param map_fd BPF map file descriptor + * @param info pointer to **struct bpf_map_info** that will be populated with + * BPF map information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len); + +/** + * @brief **bpf_btf_get_info_by_fd()** obtains information about the + * BTF object corresponding to *btf_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. + * + * @param btf_fd BTF object file descriptor + * @param info pointer to **struct bpf_btf_info** that will be populated with + * BTF object information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len); + +/** + * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF + * link corresponding to *link_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. + * + * @param link_fd BPF link file descriptor + * @param info pointer to **struct bpf_link_info** that will be populated with + * BPF link information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len); struct bpf_prog_query_opts { -- GitLab From c8ee37bde4021a275d2e4f33bd48d54912bb00c4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 27 Feb 2023 14:49:43 -0800 Subject: [PATCH 0079/2078] libbpf: Fix bpf_xdp_query() in old kernels Commit 04d58f1b26a4("libbpf: add API to get XDP/XSK supported features") added feature_flags to struct bpf_xdp_query_opts. If a user uses bpf_xdp_query_opts with feature_flags member, the bpf_xdp_query() will check whether 'netdev' family exists or not in the kernel. If it does not exist, the bpf_xdp_query() will return -ENOENT. But 'netdev' family does not exist in old kernels as it is introduced in the same patch set as Commit 04d58f1b26a4. So old kernel with newer libbpf won't work properly with bpf_xdp_query() api call. To fix this issue, if the return value of libbpf_netlink_resolve_genl_family_id() is -ENOENT, bpf_xdp_query() will just return 0, skipping the rest of xdp feature query. This preserves backward compatibility. Fixes: 04d58f1b26a4 ("libbpf: add API to get XDP/XSK supported features") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230227224943.1153459-1-yhs@fb.com --- tools/lib/bpf/netlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 1653e7a8b0a11..84dd5fa149058 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -468,8 +468,13 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) return 0; err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id); - if (err < 0) + if (err < 0) { + if (err == -ENOENT) { + opts->feature_flags = 0; + goto skip_feature_flags; + } return libbpf_err(err); + } memset(&req, 0, sizeof(req)); req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); @@ -489,6 +494,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) opts->feature_flags = md.flags; +skip_feature_flags: return 0; } -- GitLab From bbefef2f07080cd502a93cb1c529e1c8a6c4ac8e Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 28 Feb 2023 11:33:04 +0000 Subject: [PATCH 0080/2078] bpf, mips: Implement DADDI workarounds for JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For DADDI errata we just workaround by disable immediate operation for BPF_ADD / BPF_SUB to avoid generation of DADDIU. All other use cases in JIT won't cause overflow thus they are all safe. Signed-off-by: Jiaxun Yang Signed-off-by: Daniel Borkmann Reviewed-by: Philippe Mathieu-Daudé Acked-by: Johan Almbladh Link: https://lore.kernel.org/bpf/20230228113305.83751-2-jiaxun.yang@flygoat.com --- arch/mips/Kconfig | 1 - arch/mips/net/bpf_jit_comp.c | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 15cb692b0a097..b89c4bf2fa0ad 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -64,7 +64,6 @@ config MIPS select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT if !CPU_MICROMIPS && \ - !CPU_DADDI_WORKAROUNDS && \ !CPU_R4000_WORKAROUNDS && \ !CPU_R4400_WORKAROUNDS select HAVE_EXIT_THREAD diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c index b17130d510d49..a40d926b65139 100644 --- a/arch/mips/net/bpf_jit_comp.c +++ b/arch/mips/net/bpf_jit_comp.c @@ -218,9 +218,13 @@ bool valid_alu_i(u8 op, s32 imm) /* All legal eBPF values are valid */ return true; case BPF_ADD: + if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) + return false; /* imm must be 16 bits */ return imm >= -0x8000 && imm <= 0x7fff; case BPF_SUB: + if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) + return false; /* -imm must be 16 bits */ return imm >= -0x7fff && imm <= 0x8000; case BPF_AND: -- GitLab From 7364d60c26618d7465602ac86717a5a325b342f3 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 28 Feb 2023 11:33:05 +0000 Subject: [PATCH 0081/2078] bpf, mips: Implement R4000 workarounds for JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For R4000 erratas around multiplication and division instructions, as our use of those instructions are always followed by mflo/mfhi instructions, the only issue we need care is "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0" Errata 28: "A double-word or a variable shift may give an incorrect result if executed while an integer multiplication is in progress." We just emit a mfhi $0 to ensure the operation is completed after every multiplication instruction according to workaround suggestion in the document. Signed-off-by: Jiaxun Yang Signed-off-by: Daniel Borkmann Reviewed-by: Philippe Mathieu-Daudé Acked-by: Johan Almbladh Link: https://lore.kernel.org/bpf/20230228113305.83751-3-jiaxun.yang@flygoat.com --- arch/mips/Kconfig | 4 +--- arch/mips/net/bpf_jit_comp64.c | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b89c4bf2fa0ad..363cd59a3bc02 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -63,9 +63,7 @@ config MIPS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_EBPF_JIT if !CPU_MICROMIPS && \ - !CPU_R4000_WORKAROUNDS && \ - !CPU_R4400_WORKAROUNDS + select HAVE_EBPF_JIT if !CPU_MICROMIPS select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c index 0e7c1bdcf9148..fa7e9aa37f498 100644 --- a/arch/mips/net/bpf_jit_comp64.c +++ b/arch/mips/net/bpf_jit_comp64.c @@ -228,6 +228,9 @@ static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op) } else { emit(ctx, dmultu, dst, src); emit(ctx, mflo, dst); + /* Ensure multiplication is completed */ + if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS)) + emit(ctx, mfhi, MIPS_R_ZERO); } break; /* dst = dst / src */ -- GitLab From 30a2d8328d8ac1bb0a6bf73f4f4cf03f4f5977cc Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 28 Feb 2023 09:28:45 -0600 Subject: [PATCH 0082/2078] bpf: Fix bpf_cgroup_from_id() doxygen header In commit 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc"), a new bpf_cgroup_from_id() kfunc was added which allows a BPF program to lookup and acquire a reference to a cgroup from a cgroup id. The commit's doxygen comment seems to have copy-pasted fields, which causes BPF kfunc helper documentation to fail to render: /helpers.c:2114: warning: Excess function parameter 'cgrp'... /helpers.c:2114: warning: Excess function parameter 'level'... /helpers.c:2114: warning: Excess function parameter 'level'... This patch fixes the doxygen header. Fixes: 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") Signed-off-by: David Vernet Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230228152845.294695-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a784be6f8bac4..abdcc52f90a63 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2106,8 +2106,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this * kfunc which is not subsequently stored in a map, must be released by calling * bpf_cgroup_release(). - * @cgrp: The cgroup for which we're performing a lookup. - * @level: The level of ancestor to look up. + * @cgid: cgroup id. */ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) { -- GitLab From ae256f95478e07d49dae5036bb83c09dfbd686d4 Mon Sep 17 00:00:00 2001 From: "Jose E. Marchesi" Date: Tue, 28 Feb 2023 10:51:29 +0100 Subject: [PATCH 0083/2078] bpf, docs: Document BPF insn encoding in term of stored bytes [Changes from V4: - s/regs:16/regs:8 in figure.] [Changes from V3: - Back to src_reg and dst_reg, since they denote register numbers as opposed to the values stored in these registers.] [Changes from V2: - Use src and dst consistently in the document. - Use a more graphical depiction of the 128-bit instruction. - Remove `Where:' fragment. - Clarify that unused bits are reserved and shall be zeroed.] [Changes from V1: - Use rst literal blocks for figures. - Avoid using | in the basic instruction/pseudo instruction figure. - Rebased to today's bpf-next master branch.] This patch modifies instruction-set.rst so it documents the encoding of BPF instructions in terms of how the bytes are stored (be it in an ELF file or as bytes in a memory buffer to be loaded into the kernel or some other BPF consumer) as opposed to how the instruction looks like once loaded. This is hopefully easier to understand by implementors looking to generate and/or consume bytes conforming BPF instructions. The patch also clarifies that the unused bytes in a pseudo-instruction shall be cleared with zeros. Signed-off-by: Jose E. Marchesi Acked-by: Yonghong Song Acked-by: David Vernet Link: https://lore.kernel.org/r/87h6v6i0da.fsf_-_@oracle.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 46 ++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 01802ed9b29ba..db8789e6969ea 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -38,15 +38,11 @@ eBPF has two instruction encodings: * the wide instruction encoding, which appends a second 64-bit immediate (i.e., constant) value after the basic instruction for a total of 128 bits. -The basic instruction encoding looks as follows for a little-endian processor, -where MSB and LSB mean the most significant bits and least significant bits, -respectively: +The fields conforming an encoded basic instruction are stored in the +following order:: -============= ======= ======= ======= ============ -32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) -============= ======= ======= ======= ============ -imm offset src_reg dst_reg opcode -============= ======= ======= ======= ============ + opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF. + opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF. **imm** signed integer immediate value @@ -64,16 +60,17 @@ imm offset src_reg dst_reg opcode **opcode** operation to perform -and as follows for a big-endian processor: +Note that the contents of multi-byte fields ('imm' and 'offset') are +stored using big-endian byte ordering in big-endian BPF and +little-endian byte ordering in little-endian BPF. -============= ======= ======= ======= ============ -32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) -============= ======= ======= ======= ============ -imm offset dst_reg src_reg opcode -============= ======= ======= ======= ============ +For example:: -Multi-byte fields ('imm' and 'offset') are similarly stored in -the byte order of the processor. + opcode offset imm assembly + src_reg dst_reg + 07 0 1 00 00 44 33 22 11 r1 += 0x11223344 // little + dst_reg src_reg + 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big Note that most instructions do not use all of the fields. Unused fields shall be cleared to zero. @@ -84,18 +81,23 @@ The 64 bits following the basic instruction contain a pseudo instruction using the same format but with opcode, dst_reg, src_reg, and offset all set to zero, and imm containing the high 32 bits of the immediate value. -================= ================== -64 bits (MSB) 64 bits (LSB) -================= ================== -basic instruction pseudo instruction -================= ================== +This is depicted in the following figure:: + + basic_instruction + .-----------------------------. + | | + code:8 regs:8 offset:16 imm:32 unused:32 imm:32 + | | + '--------------' + pseudo instruction Thus the 64-bit immediate value is constructed as follows: imm64 = (next_imm << 32) | imm where 'next_imm' refers to the imm value of the pseudo instruction -following the basic instruction. +following the basic instruction. The unused bytes in the pseudo +instruction are reserved and shall be cleared to zero. Instruction classes ------------------- -- GitLab From 01c6c9fccbd51c1d9eab0f5794b0271b026178df Mon Sep 17 00:00:00 2001 From: Abinaya Kalaiselvan Date: Mon, 19 Dec 2022 11:08:44 +0530 Subject: [PATCH 0084/2078] wifi: ath11k: Add tx ack signal support for management packets Add support to notify tx ack signal values for management packets to userspace through nl80211 interface. Advertise NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT flag to enable this feature and it will be used for data packets as well. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Signed-off-by: Abinaya Kalaiselvan Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221219053844.4084486-1-quic_mkenna@quicinc.com --- drivers/net/wireless/ath/ath11k/hw.c | 1 + drivers/net/wireless/ath/ath11k/mac.c | 5 +++++ drivers/net/wireless/ath/ath11k/wmi.c | 27 ++++++++++++++++----------- drivers/net/wireless/ath/ath11k/wmi.h | 3 +++ 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c index ab8f0ccacc6be..60ac215e06786 100644 --- a/drivers/net/wireless/ath/ath11k/hw.c +++ b/drivers/net/wireless/ath/ath11k/hw.c @@ -201,6 +201,7 @@ static void ath11k_init_wmi_config_ipq8074(struct ath11k_base *ab, config->twt_ap_pdev_count = ab->num_radios; config->twt_ap_sta_count = 1000; config->flag1 |= WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64; + config->flag1 |= WMI_RSRC_CFG_FLAG1_ACK_RSSI; } static int ath11k_hw_mac_id_to_pdev_id_ipq8074(struct ath11k_hw_params *hw, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index fd1a23ead5e22..cad832e0e6b80 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9174,6 +9174,11 @@ static int __ath11k_mac_register(struct ath11k *ar) goto err_free_if_combs; } + if (test_bit(WMI_TLV_SERVICE_TX_DATA_MGMT_ACK_RSSI, + ar->ab->wmi_ab.svc_map)) + wiphy_ext_feature_set(ar->hw->wiphy, + NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT); + ar->hw->queues = ATH11K_HW_MAX_QUEUES; ar->hw->wiphy->tx_queue_len = ATH11K_QUEUE_LEN; ar->hw->offchannel_tx_hw_queue = ATH11K_HW_MAX_QUEUES - 1; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 7057c7916d5dc..815f49c3b3adc 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -5229,8 +5229,8 @@ static int ath11k_pull_mgmt_rx_params_tlv(struct ath11k_base *ab, return 0; } -static int wmi_process_mgmt_tx_comp(struct ath11k *ar, u32 desc_id, - u32 status) +static int wmi_process_mgmt_tx_comp(struct ath11k *ar, + struct wmi_mgmt_tx_compl_event *tx_compl_param) { struct sk_buff *msdu; struct ieee80211_tx_info *info; @@ -5238,24 +5238,29 @@ static int wmi_process_mgmt_tx_comp(struct ath11k *ar, u32 desc_id, int num_mgmt; spin_lock_bh(&ar->txmgmt_idr_lock); - msdu = idr_find(&ar->txmgmt_idr, desc_id); + msdu = idr_find(&ar->txmgmt_idr, tx_compl_param->desc_id); if (!msdu) { ath11k_warn(ar->ab, "received mgmt tx compl for invalid msdu_id: %d\n", - desc_id); + tx_compl_param->desc_id); spin_unlock_bh(&ar->txmgmt_idr_lock); return -ENOENT; } - idr_remove(&ar->txmgmt_idr, desc_id); + idr_remove(&ar->txmgmt_idr, tx_compl_param->desc_id); spin_unlock_bh(&ar->txmgmt_idr_lock); skb_cb = ATH11K_SKB_CB(msdu); dma_unmap_single(ar->ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); - if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && !status) + if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && + !tx_compl_param->status) { info->flags |= IEEE80211_TX_STAT_ACK; + if (test_bit(WMI_TLV_SERVICE_TX_DATA_MGMT_ACK_RSSI, + ar->ab->wmi_ab.svc_map)) + info->status.ack_signal = tx_compl_param->ack_rssi; + } ieee80211_tx_status_irqsafe(ar->hw, msdu); @@ -5267,7 +5272,7 @@ static int wmi_process_mgmt_tx_comp(struct ath11k *ar, u32 desc_id, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "wmi mgmt tx comp pending %d desc id %d\n", - num_mgmt, desc_id); + num_mgmt, tx_compl_param->desc_id); if (!num_mgmt) wake_up(&ar->txmgmt_empty_waitq); @@ -5300,6 +5305,7 @@ static int ath11k_pull_mgmt_tx_compl_param_tlv(struct ath11k_base *ab, param->pdev_id = ev->pdev_id; param->desc_id = ev->desc_id; param->status = ev->status; + param->ack_rssi = ev->ack_rssi; kfree(tb); return 0; @@ -7070,13 +7076,12 @@ static void ath11k_mgmt_tx_compl_event(struct ath11k_base *ab, struct sk_buff *s goto exit; } - wmi_process_mgmt_tx_comp(ar, tx_compl_param.desc_id, - tx_compl_param.status); + wmi_process_mgmt_tx_comp(ar, &tx_compl_param); ath11k_dbg(ab, ATH11K_DBG_MGMT, - "mgmt tx compl ev pdev_id %d, desc_id %d, status %d", + "mgmt tx compl ev pdev_id %d, desc_id %d, status %d ack_rssi %d", tx_compl_param.pdev_id, tx_compl_param.desc_id, - tx_compl_param.status); + tx_compl_param.status, tx_compl_param.ack_rssi); exit: rcu_read_unlock(); diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index d3b74f1346f58..63dbb085949ff 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2311,6 +2311,7 @@ struct wmi_init_cmd { } __packed; #define WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64 BIT(5) +#define WMI_RSRC_CFG_FLAG1_ACK_RSSI BIT(18) struct wmi_resource_config { u32 tlv_header; @@ -4550,6 +4551,8 @@ struct wmi_mgmt_tx_compl_event { u32 desc_id; u32 status; u32 pdev_id; + u32 ppdu_id; + u32 ack_rssi; } __packed; struct wmi_scan_event { -- GitLab From 2f46439346700a2b41cf0fa9432f110f42fd8821 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:44 -0800 Subject: [PATCH 0085/2078] bpf: Support "sk_buff" and "xdp_buff" as valid kfunc arg types The bpf mirror of the in-kernel sk_buff and xdp_buff data structures are __sk_buff and xdp_md. Currently, when we pass in the program ctx to a kfunc where the program ctx is a skb or xdp buffer, we reject the program if the in-kernel definition is sk_buff/xdp_buff instead of __sk_buff/xdp_md. This change allows "sk_buff <--> __sk_buff" and "xdp_buff <--> xdp_md" to be recognized as valid matches. The user program may pass in their program ctx as a __sk_buff or xdp_md, and the in-kernel definition of the kfunc may define this arg as a sk_buff or xdp_buff. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-2-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index fa22ec79ac0e1..84cca84738739 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5683,6 +5683,10 @@ again: * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ + if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) + return ctx_type; + if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + return ctx_type; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to * underlying struct and check name again -- GitLab From 7e0dac2807e6c4ae8c56941d74971fdb0763b4f9 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:45 -0800 Subject: [PATCH 0086/2078] bpf: Refactor process_dynptr_func This change cleans up process_dynptr_func's flow to be more intuitive and updates some comments with more context. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-3-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 3 -- kernel/bpf/verifier.c | 62 ++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cf1bb1cf4a7b3..b26ff2a8f63bc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -616,9 +616,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, enum bpf_arg_type arg_type); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); -struct bpf_call_arg_meta; -int process_dynptr_func(struct bpf_verifier_env *env, int regno, - enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5cb8b623f6397..e0e00509846b6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -959,39 +959,49 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, return 0; } -static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int spi) +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { + int spi; + if (reg->type == CONST_PTR_TO_DYNPTR) return false; - /* For -ERANGE (i.e. spi not falling into allocated stack slots), we - * will do check_mem_access to check and update stack bounds later, so - * return true for that case. + spi = dynptr_get_spi(env, reg); + + /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an + * error because this just means the stack state hasn't been updated yet. + * We will do check_mem_access to check and update stack bounds later. */ - if (spi < 0) - return spi == -ERANGE; - /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see - * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to - * ensure dynptr objects at the slots we are touching are completely - * destructed before we reinitialize them for a new one. For referenced - * ones, destroy_if_dynptr_stack_slot returns an error early instead of - * delaying it until the end where the user will get "Unreleased + if (spi < 0 && spi != -ERANGE) + return false; + + /* We don't need to check if the stack slots are marked by previous + * dynptr initializations because we allow overwriting existing unreferenced + * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls + * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are + * touching are completely destructed before we reinitialize them for a new + * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early + * instead of delaying it until the end where the user will get "Unreleased * reference" error. */ return true; } -static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - int spi) +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_func_state *state = func(env, reg); - int i; + int i, spi; - /* This already represents first slot of initialized bpf_dynptr */ + /* This already represents first slot of initialized bpf_dynptr. + * + * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to + * check_func_arg_reg_off's logic, so we don't need to check its + * offset and alignment. + */ if (reg->type == CONST_PTR_TO_DYNPTR) return true; + spi = dynptr_get_spi(env, reg); if (spi < 0) return false; if (!state->stack[spi].spilled_ptr.dynptr.first_slot) @@ -6215,11 +6225,10 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument * type, and declare it as 'const struct bpf_dynptr *' in their prototype. */ -int process_dynptr_func(struct bpf_verifier_env *env, int regno, - enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) +static int process_dynptr_func(struct bpf_verifier_env *env, int regno, + enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - int spi = 0; /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): @@ -6228,15 +6237,6 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n"); return -EFAULT; } - /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to - * check_func_arg_reg_off's logic. We only need to check offset - * and its alignment for PTR_TO_STACK. - */ - if (reg->type == PTR_TO_STACK) { - spi = dynptr_get_spi(env, reg); - if (spi < 0 && spi != -ERANGE) - return spi; - } /* MEM_UNINIT - Points to memory that is an appropriate candidate for * constructing a mutable bpf_dynptr object. @@ -6254,7 +6254,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, * to. */ if (arg_type & MEM_UNINIT) { - if (!is_dynptr_reg_valid_uninit(env, reg, spi)) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); return -EINVAL; } @@ -6277,7 +6277,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, return -EINVAL; } - if (!is_dynptr_reg_valid_init(env, reg, spi)) { + if (!is_dynptr_reg_valid_init(env, reg)) { verbose(env, "Expected an initialized dynptr as arg #%d\n", regno); -- GitLab From 1d18feb2c915c5ad0a9a61d04b8560e8efb78ce8 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:46 -0800 Subject: [PATCH 0087/2078] bpf: Allow initializing dynptrs in kfuncs This change allows kfuncs to take in an uninitialized dynptr as a parameter. Before this change, only helper functions could successfully use uninitialized dynptrs. This change moves the memory access check (including stack state growing and slot marking) into process_dynptr_func(), which both helpers and kfuncs call into. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-4-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 67 ++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e0e00509846b6..82e39fc5ed05d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -268,7 +268,6 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct btf_field *kptr_field; - u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -6225,10 +6224,11 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument * type, and declare it as 'const struct bpf_dynptr *' in their prototype. */ -static int process_dynptr_func(struct bpf_verifier_env *env, int regno, - enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) +static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx, + enum bpf_arg_type arg_type) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + int err; /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): @@ -6254,23 +6254,23 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, * to. */ if (arg_type & MEM_UNINIT) { + int i; + if (!is_dynptr_reg_valid_uninit(env, reg)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); return -EINVAL; } - /* We only support one dynptr being uninitialized at the moment, - * which is sufficient for the helper functions we have right now. - */ - if (meta->uninit_dynptr_regno) { - verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); - return -EFAULT; + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; } - meta->uninit_dynptr_regno = regno; + err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx); } else /* MEM_RDONLY and None case from above */ { - int err; - /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n"); @@ -6306,10 +6306,8 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, } err = mark_dynptr_read(env, reg); - if (err) - return err; } - return 0; + return err; } static bool arg_type_is_mem_size(enum bpf_arg_type type) @@ -6719,7 +6717,8 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, - const struct bpf_func_proto *fn) + const struct bpf_func_proto *fn, + int insn_idx) { u32 regno = BPF_REG_1 + arg; struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; @@ -6932,7 +6931,7 @@ skip_type_check: err = check_mem_size_reg(env, reg, regno, true, meta); break; case ARG_PTR_TO_DYNPTR: - err = process_dynptr_func(env, regno, arg_type, meta); + err = process_dynptr_func(env, regno, insn_idx, arg_type); if (err) return err; break; @@ -8218,7 +8217,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { - err = check_func_arg(env, i, &meta, fn); + err = check_func_arg(env, i, &meta, fn, insn_idx); if (err) return err; } @@ -8243,30 +8242,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); - /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot - * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr - * is safe to do directly. - */ - if (meta.uninit_dynptr_regno) { - if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) { - verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n"); - return -EFAULT; - } - /* we write BPF_DW bits (8 bytes) at a time */ - for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { - err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, - i, BPF_DW, BPF_WRITE, -1, false); - if (err) - return err; - } - - err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], - fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], - insn_idx); - if (err) - return err; - } - if (meta.release_regno) { err = -EINVAL; /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot @@ -9475,7 +9450,8 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, &meta->arg_rbtree_root.field); } -static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta) +static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, + int insn_idx) { const char *func_name = meta->func_name, *ref_tname; const struct btf *btf = meta->btf; @@ -9672,7 +9648,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } - ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL); + ret = process_dynptr_func(env, regno, insn_idx, + ARG_PTR_TO_DYNPTR | MEM_RDONLY); if (ret < 0) return ret; break; @@ -9880,7 +9857,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } /* Check the arguments */ - err = check_kfunc_args(env, &meta); + err = check_kfunc_args(env, &meta, insn_idx); if (err < 0) return err; /* In case of release function, we get register number of refcounted -- GitLab From 8357b366cbb09b17c90e2cd758360a6bd2ea7507 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:47 -0800 Subject: [PATCH 0088/2078] bpf: Define no-ops for externally called bpf dynptr functions Some bpf dynptr functions will be called from places where if CONFIG_BPF_SYSCALL is not set, then the dynptr function is undefined. For example, when skb type dynptrs are added in the next commit, dynptr functions are called from net/core/filter.c This patch defines no-op implementations of these dynptr functions so that they do not break compilation by being an undefined reference. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-5-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 75 +++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 520b238abd5a2..296841a317490 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1124,6 +1124,33 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( return bpf_func(ctx, insnsi); } +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +int bpf_dynptr_check_size(u32 size); +u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); + #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); @@ -2266,6 +2293,11 @@ static inline bool has_current_bpf_ctx(void) } void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2495,6 +2527,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) { } + +static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ +} + +static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ +} + +static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2913,36 +2958,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); -/* the implementation of the opaque uapi struct bpf_dynptr */ -struct bpf_dynptr_kern { - void *data; - /* Size represents the number of usable bytes of dynptr data. - * If for example the offset is at 4 for a local dynptr whose data is - * of type u64, the number of usable bytes is 4. - * - * The upper 8 bits are reserved. It is as follows: - * Bits 0 - 23 = size - * Bits 24 - 30 = dynptr type - * Bit 31 = whether dynptr is read-only - */ - u32 size; - u32 offset; -} __aligned(8); - -enum bpf_dynptr_type { - BPF_DYNPTR_TYPE_INVALID, - /* Points to memory that is local to the bpf program */ - BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a kernel-produced ringbuf record */ - BPF_DYNPTR_TYPE_RINGBUF, -}; - -void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, - enum bpf_dynptr_type type, u32 offset, u32 size); -void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); -int bpf_dynptr_check_size(u32 size); -u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); - #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); void bpf_cgroup_atype_put(int cgroup_atype); -- GitLab From 485ec51ef9764c0f67d35cabba0a963936b9126e Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:48 -0800 Subject: [PATCH 0089/2078] bpf: Refactor verifier dynptr into get_dynptr_arg_reg This commit refactors the logic for determining which register in a function is the dynptr into "get_dynptr_arg_reg". This will be used in the future when the dynptr reg for BPF_FUNC_dynptr_write will need to be obtained in order to support writes for skb dynptrs. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-6-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 80 +++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 82e39fc5ed05d..8fd2f26a89774 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6689,6 +6689,28 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, } } +static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env, + const struct bpf_func_proto *fn, + struct bpf_reg_state *regs) +{ + struct bpf_reg_state *state = NULL; + int i; + + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (state) { + verbose(env, "verifier internal error: multiple dynptr args\n"); + return NULL; + } + state = ®s[BPF_REG_1 + i]; + } + + if (!state) + verbose(env, "verifier internal error: no dynptr arg found\n"); + + return state; +} + static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_func_state *state = func(env, reg); @@ -8326,43 +8348,41 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } break; case BPF_FUNC_dynptr_data: - for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { - if (arg_type_is_dynptr(fn->arg_type[i])) { - struct bpf_reg_state *reg = ®s[BPF_REG_1 + i]; - int id, ref_obj_id; - - if (meta.dynptr_id) { - verbose(env, "verifier internal error: meta.dynptr_id already set\n"); - return -EFAULT; - } - - if (meta.ref_obj_id) { - verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); - return -EFAULT; - } + { + struct bpf_reg_state *reg; + int id, ref_obj_id; - id = dynptr_id(env, reg); - if (id < 0) { - verbose(env, "verifier internal error: failed to obtain dynptr id\n"); - return id; - } + reg = get_dynptr_arg_reg(env, fn, regs); + if (!reg) + return -EFAULT; - ref_obj_id = dynptr_ref_obj_id(env, reg); - if (ref_obj_id < 0) { - verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n"); - return ref_obj_id; - } - meta.dynptr_id = id; - meta.ref_obj_id = ref_obj_id; - break; - } + if (meta.dynptr_id) { + verbose(env, "verifier internal error: meta.dynptr_id already set\n"); + return -EFAULT; } - if (i == MAX_BPF_FUNC_REG_ARGS) { - verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n"); + if (meta.ref_obj_id) { + verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); return -EFAULT; } + + id = dynptr_id(env, reg); + if (id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr id\n"); + return id; + } + + ref_obj_id = dynptr_ref_obj_id(env, reg); + if (ref_obj_id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n"); + return ref_obj_id; + } + + meta.dynptr_id = id; + meta.ref_obj_id = ref_obj_id; + break; + } case BPF_FUNC_user_ringbuf_drain: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_user_ringbuf_callback_state); -- GitLab From d96d937d7c5c12237dce1f14bf0fc9900cabba09 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:49 -0800 Subject: [PATCH 0090/2078] bpf: Add __uninit kfunc annotation This patch adds __uninit as a kfunc annotation. This will be useful for scenarios such as for example in dynptrs, indicating whether the dynptr should be checked by the verifier as an initialized or an uninitialized dynptr. Without this annotation, the alternative would be needing to hard-code in the verifier the specific kfunc to indicate that arg should be treated as an uninitialized arg. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-7-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 17 +++++++++++++++++ kernel/bpf/verifier.c | 18 ++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 226313747be56..9a78533d25ac7 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -100,6 +100,23 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a size parameter, and the value of the constant matters for program safety, __k suffix should be used. +2.2.2 __uninit Annotation +-------------------- + +This annotation is used to indicate that the argument will be treated as +uninitialized. + +An example is given below:: + + __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit) + { + ... + } + +Here, the dynptr will be treated as an uninitialized dynptr. Without this +annotation, the verifier will reject the program if the dynptr passed in is +not initialized. + .. _BPF_kfunc_nodef: 2.3 Using an existing kernel function diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8fd2f26a89774..d052aa5800de8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8727,6 +8727,11 @@ static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param return __kfunc_param_match_suffix(btf, arg, "__alloc"); } +static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__uninit"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -9662,17 +9667,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return ret; break; case KF_ARG_PTR_TO_DYNPTR: + { + enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; + if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i); return -EINVAL; } - ret = process_dynptr_func(env, regno, insn_idx, - ARG_PTR_TO_DYNPTR | MEM_RDONLY); + if (reg->type == CONST_PTR_TO_DYNPTR) + dynptr_arg_type |= MEM_RDONLY; + + if (is_kfunc_arg_uninit(btf, &args[i])) + dynptr_arg_type |= MEM_UNINIT; + + ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type); if (ret < 0) return ret; break; + } case KF_ARG_PTR_TO_LIST_HEAD: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { -- GitLab From b5964b968ac64c2ec2debee7518499113b27c34e Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:50 -0800 Subject: [PATCH 0091/2078] bpf: Add skb dynptrs Add skb dynptrs, which are dynptrs whose underlying pointer points to a skb. The dynptr acts on skb data. skb dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of skb->data and skb->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For bpf prog types that don't support writes on skb data, the dynptr is read-only (bpf_dynptr_write() will return an error) For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write() interfaces, reading and writing from/to data in the head as well as from/to non-linear paged buffers is supported. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used. For examples of how skb dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-8-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 14 ++++++- include/linux/filter.h | 18 ++++++++ include/uapi/linux/bpf.h | 13 +++++- kernel/bpf/btf.c | 18 ++++++++ kernel/bpf/helpers.c | 76 +++++++++++++++++++++++++++------- kernel/bpf/verifier.c | 61 +++++++++++++++++++++++++++ net/core/filter.c | 67 ++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 13 +++++- 8 files changed, 261 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 296841a317490..e7436d7615b0d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -607,11 +607,14 @@ enum bpf_type_flag { */ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to sk_buff */ + DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; -#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1146,6 +1149,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_LOCAL, /* Underlying data is a ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, + /* Underlying data is a sk_buff */ + BPF_DYNPTR_TYPE_SKB, }; int bpf_dynptr_check_size(u32 size); @@ -2846,6 +2851,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); +int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -2867,6 +2874,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } +static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr) +{ + return -EOPNOTSUPP; +} #endif #ifdef CONFIG_INET diff --git a/include/linux/filter.h b/include/linux/filter.h index 1727898f16413..de18e844d15ed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1542,4 +1542,22 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index return XDP_REDIRECT; } +#ifdef CONFIG_NET +int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len); +int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, + u32 len, u64 flags); +#else /* CONFIG_NET */ +static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, + void *to, u32 len) +{ + return -EOPNOTSUPP; +} + +static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_NET */ + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 62ce1f5d1b1d9..d0351d30e5515 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5325,11 +5325,17 @@ union bpf_attr { * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. - * *flags* is currently unused. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr or if *flags* is not 0. + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description @@ -5337,6 +5343,9 @@ union bpf_attr { * * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. + * + * skb type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 84cca84738739..ef2d8969ed1f7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -207,6 +207,11 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, + BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_SCHED_ACT, + BTF_KFUNC_HOOK_SK_SKB, + BTF_KFUNC_HOOK_SOCKET_FILTER, + BTF_KFUNC_HOOK_LWT, BTF_KFUNC_HOOK_MAX, }; @@ -7708,6 +7713,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; + case BPF_PROG_TYPE_CGROUP_SKB: + return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_SCHED_ACT: + return BTF_KFUNC_HOOK_SCHED_ACT; + case BPF_PROG_TYPE_SK_SKB: + return BTF_KFUNC_HOOK_SK_SKB; + case BPF_PROG_TYPE_SOCKET_FILTER: + return BTF_KFUNC_HOOK_SOCKET_FILTER; + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + return BTF_KFUNC_HOOK_LWT; default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index abdcc52f90a63..e8e2414d1587c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) return ptr->size & DYNPTR_RDONLY_BIT; } +void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) +{ + ptr->size |= DYNPTR_RDONLY_BIT; +} + static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) { ptr->size |= type << DYNPTR_TYPE_SHIFT; } +static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr) +{ + return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; +} + u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_SIZE_MASK; @@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, u32, offset, u64, flags) { + enum bpf_dynptr_type type; int err; if (!src->data || flags) @@ -1506,13 +1517,23 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern if (err) return err; - /* Source and destination may possibly overlap, hence use memmove to - * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr - * pointing to overlapping PTR_TO_MAP_VALUE regions. - */ - memmove(dst, src->data + src->offset + offset, len); + type = bpf_dynptr_get_type(src); - return 0; + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + /* Source and destination may possibly overlap, hence use memmove to + * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr + * pointing to overlapping PTR_TO_MAP_VALUE regions. + */ + memmove(dst, src->data + src->offset + offset, len); + return 0; + case BPF_DYNPTR_TYPE_SKB: + return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); + default: + WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); + return -EFAULT; + } } static const struct bpf_func_proto bpf_dynptr_read_proto = { @@ -1529,22 +1550,36 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len, u64, flags) { + enum bpf_dynptr_type type; int err; - if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) + if (!dst->data || bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); if (err) return err; - /* Source and destination may possibly overlap, hence use memmove to - * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr - * pointing to overlapping PTR_TO_MAP_VALUE regions. - */ - memmove(dst->data + dst->offset + offset, src, len); + type = bpf_dynptr_get_type(dst); - return 0; + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + if (flags) + return -EINVAL; + /* Source and destination may possibly overlap, hence use memmove to + * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr + * pointing to overlapping PTR_TO_MAP_VALUE regions. + */ + memmove(dst->data + dst->offset + offset, src, len); + return 0; + case BPF_DYNPTR_TYPE_SKB: + return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, + flags); + default: + WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); + return -EFAULT; + } } static const struct bpf_func_proto bpf_dynptr_write_proto = { @@ -1560,6 +1595,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = { BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) { + enum bpf_dynptr_type type; int err; if (!ptr->data) @@ -1572,7 +1608,19 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 if (bpf_dynptr_is_rdonly(ptr)) return 0; - return (unsigned long)(ptr->data + ptr->offset + offset); + type = bpf_dynptr_get_type(ptr); + + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + return (unsigned long)(ptr->data + ptr->offset + offset); + case BPF_DYNPTR_TYPE_SKB: + /* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ + return 0; + default: + WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); + return 0; + } } static const struct bpf_func_proto bpf_dynptr_data_proto = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d052aa5800de8..4f5fce16543ba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -750,6 +750,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_LOCAL; case DYNPTR_TYPE_RINGBUF: return BPF_DYNPTR_TYPE_RINGBUF; + case DYNPTR_TYPE_SKB: + return BPF_DYNPTR_TYPE_SKB; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -6295,6 +6297,9 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn case DYNPTR_TYPE_RINGBUF: err_extra = "ringbuf"; break; + case DYNPTR_TYPE_SKB: + err_extra = "skb "; + break; default: err_extra = ""; break; @@ -6737,6 +6742,24 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state return state->stack[spi].spilled_ptr.ref_obj_id; } +static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, + struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi; + + if (reg->type == CONST_PTR_TO_DYNPTR) + return reg->dynptr.type; + + spi = __get_spi(reg->off); + if (spi < 0) { + verbose(env, "verifier internal error: invalid spi when querying dynptr type\n"); + return BPF_DYNPTR_TYPE_INVALID; + } + + return state->stack[spi].spilled_ptr.dynptr.type; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -8383,6 +8406,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } + case BPF_FUNC_dynptr_write: + { + enum bpf_dynptr_type dynptr_type; + struct bpf_reg_state *reg; + + reg = get_dynptr_arg_reg(env, fn, regs); + if (!reg) + return -EFAULT; + + dynptr_type = dynptr_get_type(env, reg); + if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) + return -EFAULT; + + if (dynptr_type == BPF_DYNPTR_TYPE_SKB) + /* this will trigger clear_all_pkt_pointers(), which will + * invalidate all dynptr slices associated with the skb + */ + changes_data = true; + + break; + } case BPF_FUNC_user_ringbuf_drain: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_user_ringbuf_callback_state); @@ -8898,6 +8942,7 @@ enum special_kfunc_type { KF_bpf_rbtree_remove, KF_bpf_rbtree_add, KF_bpf_rbtree_first, + KF_bpf_dynptr_from_skb, }; BTF_SET_START(special_kfunc_set) @@ -8912,6 +8957,7 @@ BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) +BTF_ID(func, bpf_dynptr_from_skb) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -8928,6 +8974,7 @@ BTF_ID(func, bpf_rcu_read_unlock) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) +BTF_ID(func, bpf_dynptr_from_skb) static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { @@ -9682,6 +9729,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (is_kfunc_arg_uninit(btf, &args[i])) dynptr_arg_type |= MEM_UNINIT; + if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) + dynptr_arg_type |= DYNPTR_TYPE_SKB; + ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type); if (ret < 0) return ret; @@ -16356,6 +16406,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; + } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { + bool seen_direct_write = env->seen_direct_write; + bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); + + if (is_rdonly) + insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly); + + /* restore env->seen_direct_write to its original value, since + * may_access_direct_pkt_data mutates it + */ + env->seen_direct_write = seen_direct_write; } return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index 1d6f165923bff..f3afa31a9b10c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; +int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, + u32 len, u64 flags) +{ + return ____bpf_skb_store_bytes(skb, offset, from, len, flags); +} + BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, void *, to, u32, len) { @@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) +{ + return ____bpf_skb_load_bytes(skb, offset, to, len); +} + BPF_CALL_4(bpf_flow_dissector_load_bytes, const struct bpf_flow_dissector *, ctx, u32, offset, void *, to, u32, len) @@ -11621,3 +11632,59 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) return func; } + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + if (flags) { + bpf_dynptr_set_null(ptr__uninit); + return -EINVAL; + } + + bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); + + return 0; +} +__diag_pop(); + +int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + int err; + + err = bpf_dynptr_from_skb(skb, flags, ptr__uninit); + if (err) + return err; + + bpf_dynptr_set_rdonly(ptr__uninit); + + return 0; +} + +BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb) +BTF_SET8_END(bpf_kfunc_check_set_skb) + +static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_skb, +}; + +static int __init bpf_kfunc_init(void) +{ + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); +} +late_initcall(bpf_kfunc_init); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 62ce1f5d1b1d9..d0351d30e5515 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5325,11 +5325,17 @@ union bpf_attr { * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. - * *flags* is currently unused. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr or if *flags* is not 0. + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description @@ -5337,6 +5343,9 @@ union bpf_attr { * * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. + * + * skb type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length -- GitLab From 05421aecd4ed65da0dc17b0c3c13779ef334e9e5 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:51 -0800 Subject: [PATCH 0092/2078] bpf: Add xdp dynptrs Add xdp dynptrs, which are dynptrs whose underlying pointer points to a xdp_buff. The dynptr acts on xdp data. xdp dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of xdp->data and xdp->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For reads and writes on the dynptr, this includes reading/writing from/to and across fragments. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() should be used. For examples of how xdp dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-9-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +++++++- include/linux/filter.h | 14 +++++++++++++ include/uapi/linux/bpf.h | 2 +- kernel/bpf/helpers.c | 9 ++++++++- kernel/bpf/verifier.c | 10 +++++++++ net/core/filter.c | 37 ++++++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 2 +- 7 files changed, 76 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e7436d7615b0d..23ec684e660d5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -610,11 +610,15 @@ enum bpf_type_flag { /* DYNPTR points to sk_buff */ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to xdp_buff */ + DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; -#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB) +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ + | DYNPTR_TYPE_XDP) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1151,6 +1155,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_RINGBUF, /* Underlying data is a sk_buff */ BPF_DYNPTR_TYPE_SKB, + /* Underlying data is a xdp_buff */ + BPF_DYNPTR_TYPE_XDP, }; int bpf_dynptr_check_size(u32 size); diff --git a/include/linux/filter.h b/include/linux/filter.h index de18e844d15ed..3f6992261ec59 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1546,6 +1546,8 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len); int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags); +int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); +int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1558,6 +1560,18 @@ static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, { return -EOPNOTSUPP; } + +static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, + void *buf, u32 len) +{ + return -EOPNOTSUPP; +} + +static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, + void *buf, u32 len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d0351d30e5515..faa304c926cf6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5344,7 +5344,7 @@ union bpf_attr { * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. * - * skb type dynptrs may not use bpf_dynptr_data. They should + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e8e2414d1587c..114a875a05b11 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1530,6 +1530,8 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern return 0; case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len); + case BPF_DYNPTR_TYPE_XDP: + return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len); default: WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); return -EFAULT; @@ -1576,6 +1578,10 @@ BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, v case BPF_DYNPTR_TYPE_SKB: return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len, flags); + case BPF_DYNPTR_TYPE_XDP: + if (flags) + return -EINVAL; + return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len); default: WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); return -EFAULT; @@ -1615,7 +1621,8 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3 case BPF_DYNPTR_TYPE_RINGBUF: return (unsigned long)(ptr->data + ptr->offset + offset); case BPF_DYNPTR_TYPE_SKB: - /* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ + case BPF_DYNPTR_TYPE_XDP: + /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */ return 0; default: WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4f5fce16543ba..5e42946e53abc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -752,6 +752,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_RINGBUF; case DYNPTR_TYPE_SKB: return BPF_DYNPTR_TYPE_SKB; + case DYNPTR_TYPE_XDP: + return BPF_DYNPTR_TYPE_XDP; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -6300,6 +6302,9 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn case DYNPTR_TYPE_SKB: err_extra = "skb "; break; + case DYNPTR_TYPE_XDP: + err_extra = "xdp "; + break; default: err_extra = ""; break; @@ -8943,6 +8948,7 @@ enum special_kfunc_type { KF_bpf_rbtree_add, KF_bpf_rbtree_first, KF_bpf_dynptr_from_skb, + KF_bpf_dynptr_from_xdp, }; BTF_SET_START(special_kfunc_set) @@ -8958,6 +8964,7 @@ BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) +BTF_ID(func, bpf_dynptr_from_xdp) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -8975,6 +8982,7 @@ BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) +BTF_ID(func, bpf_dynptr_from_xdp) static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { @@ -9731,6 +9739,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) dynptr_arg_type |= DYNPTR_TYPE_SKB; + else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) + dynptr_arg_type |= DYNPTR_TYPE_XDP; ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type); if (ret < 0) diff --git a/net/core/filter.c b/net/core/filter.c index f3afa31a9b10c..c692046fa7f67 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3839,7 +3839,7 @@ static const struct bpf_func_proto sk_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) +BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) { return xdp_get_buff_len(xdp); } @@ -3999,6 +3999,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len) +{ + return ____bpf_xdp_load_bytes(xdp, offset, buf, len); +} + BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, void *, buf, u32, len) { @@ -4026,6 +4031,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len) +{ + return ____bpf_xdp_store_bytes(xdp, offset, buf, len); +} + static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); @@ -11648,6 +11658,19 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, return 0; } + +__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, + struct bpf_dynptr_kern *ptr__uninit) +{ + if (flags) { + bpf_dynptr_set_null(ptr__uninit); + return -EINVAL; + } + + bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11668,11 +11691,20 @@ BTF_SET8_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb) BTF_SET8_END(bpf_kfunc_check_set_skb) +BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) +BTF_SET8_END(bpf_kfunc_check_set_xdp) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_xdp, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11685,6 +11717,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); } late_initcall(bpf_kfunc_init); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d0351d30e5515..faa304c926cf6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5344,7 +5344,7 @@ union bpf_attr { * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. * - * skb type dynptrs may not use bpf_dynptr_data. They should + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is -- GitLab From 66e3a13e7c2c44d0c9dd6bb244680ca7529a8845 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:52 -0800 Subject: [PATCH 0093/2078] bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr Two new kfuncs are added, bpf_dynptr_slice and bpf_dynptr_slice_rdwr. The user must pass in a buffer to store the contents of the data slice if a direct pointer to the data cannot be obtained. For skb and xdp type dynptrs, these two APIs are the only way to obtain a data slice. However, for other types of dynptrs, there is no difference between bpf_dynptr_slice(_rdwr) and bpf_dynptr_data. For skb type dynptrs, the data is copied into the user provided buffer if any of the data is not in the linear portion of the skb. For xdp type dynptrs, the data is copied into the user provided buffer if the data is between xdp frags. If the skb is cloned and a call to bpf_dynptr_data_rdwr is made, then the skb will be uncloned (see bpf_unclone_prologue()). Please note that any bpf_dynptr_write() automatically invalidates any prior data slices of the skb dynptr. This is because the skb may be cloned or may need to pull its paged buffer into the head. As such, any bpf_dynptr_write() will automatically have its prior data slices invalidated, even if the write is to data in the skb head of an uncloned skb. Please note as well that any other helper calls that change the underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data slices of the skb dynptr as well, for the same reasons. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-10-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 14 ++++ include/uapi/linux/bpf.h | 5 ++ kernel/bpf/helpers.c | 138 +++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 127 +++++++++++++++++++++++++++++- net/core/filter.c | 6 +- tools/include/uapi/linux/bpf.h | 5 ++ 6 files changed, 288 insertions(+), 7 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 3f6992261ec59..efa5d4a1677ee 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1548,6 +1548,9 @@ int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags); int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1572,6 +1575,17 @@ static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, { return -EOPNOTSUPP; } + +static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +{ + return NULL; +} + +static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, + unsigned long len, bool flush) +{ + return NULL; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index faa304c926cf6..c9699304aed2d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5329,6 +5329,11 @@ union bpf_attr { * *flags* must be 0 except for skb-type dynptrs. * * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * * * For *flags*, please see the flags accepted by * **bpf_skb_store_bytes**\ (). * Return diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 114a875a05b11..648b29e78b844 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2193,6 +2193,142 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) return p; } +/** + * bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data. + * + * For non-skb and non-xdp type dynptrs, there is no difference between + * bpf_dynptr_slice and bpf_dynptr_data. + * + * If the intention is to write to the data slice, please use + * bpf_dynptr_slice_rdwr. + * + * The user must check that the returned pointer is not null before using it. + * + * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice + * does not change the underlying packet data pointers, so a call to + * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in + * the bpf program. + * + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. + * + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only + * data slice (can be either direct pointer to the data or a pointer to the user + * provided buffer, with its contents containing the data, if unable to obtain + * direct pointer) + */ +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, + void *buffer, u32 buffer__szk) +{ + enum bpf_dynptr_type type; + u32 len = buffer__szk; + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + type = bpf_dynptr_get_type(ptr); + + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + case BPF_DYNPTR_TYPE_RINGBUF: + return ptr->data + ptr->offset + offset; + case BPF_DYNPTR_TYPE_SKB: + return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer); + case BPF_DYNPTR_TYPE_XDP: + { + void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); + if (xdp_ptr) + return xdp_ptr; + + bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false); + return buffer; + } + default: + WARN_ONCE(true, "unknown dynptr type %d\n", type); + return 0; + } +} + +/** + * bpf_dynptr_slice_rdwr - Obtain a writable pointer to the dynptr data. + * + * For non-skb and non-xdp type dynptrs, there is no difference between + * bpf_dynptr_slice and bpf_dynptr_data. + * + * The returned pointer is writable and may point to either directly the dynptr + * data at the requested offset or to the buffer if unable to obtain a direct + * data pointer to (example: the requested slice is to the paged area of an skb + * packet). In the case where the returned pointer is to the buffer, the user + * is responsible for persisting writes through calling bpf_dynptr_write(). This + * usually looks something like this pattern: + * + * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer)); + * if (!eth) + * return TC_ACT_SHOT; + * + * // mutate eth header // + * + * if (eth == buffer) + * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); + * + * Please note that, as in the example above, the user must check that the + * returned pointer is not null before using it. + * + * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr + * does not change the underlying packet data pointers, so a call to + * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in + * the bpf program. + * + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. + * + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a + * data slice (can be either direct pointer to the data or a pointer to the user + * provided buffer, with its contents containing the data, if unable to obtain + * direct pointer) + */ +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, + void *buffer, u32 buffer__szk) +{ + if (!ptr->data || bpf_dynptr_is_rdonly(ptr)) + return 0; + + /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. + * + * For skb-type dynptrs, it is safe to write into the returned pointer + * if the bpf program allows skb data writes. There are two possiblities + * that may occur when calling bpf_dynptr_slice_rdwr: + * + * 1) The requested slice is in the head of the skb. In this case, the + * returned pointer is directly to skb data, and if the skb is cloned, the + * verifier will have uncloned it (see bpf_unclone_prologue()) already. + * The pointer can be directly written into. + * + * 2) Some portion of the requested slice is in the paged buffer area. + * In this case, the requested data will be copied out into the buffer + * and the returned pointer will be a pointer to the buffer. The skb + * will not be pulled. To persist the write, the user will need to call + * bpf_dynptr_write(), which will pull the skb and commit the write. + * + * Similarly for xdp programs, if the requested slice is not across xdp + * fragments, then a direct pointer will be returned, otherwise the data + * will be copied out into the buffer and the user will need to call + * bpf_dynptr_write() to commit changes. + */ + return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk); +} + __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) { return obj; @@ -2262,6 +2398,8 @@ BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) +BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5e42946e53abc..a856896e835a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -759,6 +759,22 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) } } +static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) +{ + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + return DYNPTR_TYPE_LOCAL; + case BPF_DYNPTR_TYPE_RINGBUF: + return DYNPTR_TYPE_RINGBUF; + case BPF_DYNPTR_TYPE_SKB: + return DYNPTR_TYPE_SKB; + case BPF_DYNPTR_TYPE_XDP: + return DYNPTR_TYPE_XDP; + default: + return 0; + } +} + static bool dynptr_type_refcounted(enum bpf_dynptr_type type) { return type == BPF_DYNPTR_TYPE_RINGBUF; @@ -1681,6 +1697,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) reg->type == PTR_TO_PACKET_END; } +static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) +{ + return base_type(reg->type) == PTR_TO_MEM && + (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP); +} + /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which) @@ -7429,6 +7451,9 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. + * + * This also applies to dynptr slices belonging to skb and xdp dynptrs, + * since these slices point to packet data. */ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { @@ -7436,7 +7461,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg_is_pkt_pointer_any(reg)) + if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg)) mark_reg_invalid(env, reg); })); } @@ -8688,6 +8713,11 @@ struct bpf_kfunc_call_arg_meta { struct { struct btf_field *field; } arg_rbtree_root; + struct { + enum bpf_dynptr_type type; + u32 id; + } initialized_dynptr; + u64 mem_size; }; static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) @@ -8761,6 +8791,19 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, return __kfunc_param_match_suffix(btf, arg, "__sz"); } +static bool is_kfunc_arg_const_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + const struct btf_type *t; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + return __kfunc_param_match_suffix(btf, arg, "__szk"); +} + static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) { return __kfunc_param_match_suffix(btf, arg, "__k"); @@ -8949,6 +8992,8 @@ enum special_kfunc_type { KF_bpf_rbtree_first, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, + KF_bpf_dynptr_slice, + KF_bpf_dynptr_slice_rdwr, }; BTF_SET_START(special_kfunc_set) @@ -8965,6 +9010,8 @@ BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_slice) +BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -8983,6 +9030,8 @@ BTF_ID(func, bpf_rbtree_add) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_slice) +BTF_ID(func, bpf_dynptr_slice_rdwr) static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { @@ -9062,7 +9111,10 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_callback(env, meta->btf, &args[argno])) return KF_ARG_PTR_TO_CALLBACK; - if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])) + + if (argno + 1 < nargs && + (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) || + is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]))) arg_mem_size = true; /* This is the catch all argument type of register types supported by @@ -9745,6 +9797,18 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type); if (ret < 0) return ret; + + if (!(dynptr_arg_type & MEM_UNINIT)) { + int id = dynptr_id(env, reg); + + if (id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr id\n"); + return id; + } + meta->initialized_dynptr.id = id; + meta->initialized_dynptr.type = dynptr_get_type(env, reg); + } + break; } case KF_ARG_PTR_TO_LIST_HEAD: @@ -9840,14 +9904,33 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return ret; break; case KF_ARG_PTR_TO_MEM_SIZE: - ret = check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1); + { + struct bpf_reg_state *size_reg = ®s[regno + 1]; + const struct btf_param *size_arg = &args[i + 1]; + + ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1); if (ret < 0) { verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); return ret; } - /* Skip next '__sz' argument */ + + if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) { + if (meta->arg_constant.found) { + verbose(env, "verifier internal error: only one constant argument permitted\n"); + return -EFAULT; + } + if (!tnum_is_const(size_reg->var_off)) { + verbose(env, "R%d must be a known constant\n", regno + 1); + return -EINVAL; + } + meta->arg_constant.found = true; + meta->arg_constant.value = size_reg->var_off.value; + } + + /* Skip next '__sz' or '__szk' argument */ i++; break; + } case KF_ARG_PTR_TO_CALLBACK: meta->subprogno = reg->subprogno; break; @@ -10082,6 +10165,42 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].btf_id = meta.arg_constant.value; + } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] || + meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) { + enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type); + + mark_reg_known_zero(env, regs, BPF_REG_0); + + if (!meta.arg_constant.found) { + verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n"); + return -EFAULT; + } + + regs[BPF_REG_0].mem_size = meta.arg_constant.value; + + /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */ + regs[BPF_REG_0].type = PTR_TO_MEM | type_flag; + + if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) { + regs[BPF_REG_0].type |= MEM_RDONLY; + } else { + /* this will set env->seen_direct_write to true */ + if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) { + verbose(env, "the prog does not allow writes to packet data\n"); + return -EINVAL; + } + } + + if (!meta.initialized_dynptr.id) { + verbose(env, "verifier internal error: no dynptr id\n"); + return -EFAULT; + } + regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id; + + /* we don't need to set BPF_REG_0's ref obj id + * because packet slices are not refcounted (see + * dynptr_type_refcounted) + */ } else { verbose(env, "kernel function %s unhandled dynamic return type\n", meta.func_name); diff --git a/net/core/filter.c b/net/core/filter.c index c692046fa7f67..8f3124e061330 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3894,8 +3894,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; -static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, - void *buf, unsigned long len, bool flush) +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush) { unsigned long ptr_len, ptr_off = 0; skb_frag_t *next_frag, *end_frag; @@ -3941,7 +3941,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, } } -static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); u32 size = xdp->data_end - xdp->data; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index faa304c926cf6..c9699304aed2d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5329,6 +5329,11 @@ union bpf_attr { * *flags* must be 0 except for skb-type dynptrs. * * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * * * For *flags*, please see the flags accepted by * **bpf_skb_store_bytes**\ (). * Return -- GitLab From cfa7b011894d689cccfa88a25da324fa5c34e4ed Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:53 -0800 Subject: [PATCH 0094/2078] selftests/bpf: tests for using dynptrs to parse skb and xdp buffers Test skb and xdp dynptr functionality in the following ways: 1) progs/test_cls_redirect_dynptr.c * Rewrite "progs/test_cls_redirect.c" test to use dynptrs to parse skb data * This is a great example of how dynptrs can be used to simplify a lot of the parsing logic for non-statically known values. When measuring the user + system time between the original version vs. using dynptrs, and averaging the time for 10 runs (using "time ./test_progs -t cls_redirect"): original version: 0.092 sec with dynptrs: 0.078 sec 2) progs/test_xdp_dynptr.c * Rewrite "progs/test_xdp.c" test to use dynptrs to parse xdp data When measuring the user + system time between the original version vs. using dynptrs, and averaging the time for 10 runs (using "time ./test_progs -t xdp_attach"): original version: 0.118 sec with dynptrs: 0.094 sec 3) progs/test_l4lb_noinline_dynptr.c * Rewrite "progs/test_l4lb_noinline.c" test to use dynptrs to parse skb data When measuring the user + system time between the original version vs. using dynptrs, and averaging the time for 10 runs (using "time ./test_progs -t l4lb_all"): original version: 0.062 sec with dynptrs: 0.081 sec For number of processed verifier instructions: original version: 6268 insns with dynptrs: 2588 insns 4) progs/test_parse_tcp_hdr_opt_dynptr.c * Add sample code for parsing tcp hdr opt lookup using dynptrs. This logic is lifted from a real-world use case of packet parsing in katran [0], a layer 4 load balancer. The original version "progs/test_parse_tcp_hdr_opt.c" (not using dynptrs) is included here as well, for comparison. When measuring the user + system time between the original version vs. using dynptrs, and averaging the time for 10 runs (using "time ./test_progs -t parse_tcp_hdr_opt"): original version: 0.031 sec with dynptrs: 0.045 sec 5) progs/dynptr_success.c * Add test case "test_skb_readonly" for testing attempts at writes on a prog type with read-only skb ctx. * Add "test_dynptr_skb_data" for testing that bpf_dynptr_data isn't supported for skb progs. 6) progs/dynptr_fail.c * Add test cases "skb_invalid_data_slice{1,2,3,4}" and "xdp_invalid_data_slice{1,2}" for testing that helpers that modify the underlying packet buffer automatically invalidate the associated data slice. * Add test cases "skb_invalid_ctx" and "xdp_invalid_ctx" for testing that prog types that do not support bpf_dynptr_from_skb/xdp don't have access to the API. * Add test case "dynptr_slice_var_len{1,2}" for testing that variable-sized len can't be passed in to bpf_dynptr_slice * Add test case "skb_invalid_slice_write" for testing that writes to a read-only data slice are rejected by the verifier. * Add test case "data_slice_out_of_bounds_skb" for testing that writes to an area outside the slice are rejected. * Add test case "invalid_slice_rdwr_rdonly" for testing that prog types that don't allow writes to packet data don't accept any calls to bpf_dynptr_slice_rdwr. [0] https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h Signed-off-by: Joanne Koong Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230301154953.641654-11-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 2 + tools/testing/selftests/bpf/bpf_kfuncs.h | 38 + .../selftests/bpf/prog_tests/cls_redirect.c | 25 + .../testing/selftests/bpf/prog_tests/dynptr.c | 74 +- .../selftests/bpf/prog_tests/l4lb_all.c | 2 + .../bpf/prog_tests/parse_tcp_hdr_opt.c | 93 ++ .../selftests/bpf/prog_tests/xdp_attach.c | 11 +- .../testing/selftests/bpf/progs/dynptr_fail.c | 287 ++++- .../selftests/bpf/progs/dynptr_success.c | 55 +- .../bpf/progs/test_cls_redirect_dynptr.c | 980 ++++++++++++++++++ .../bpf/progs/test_l4lb_noinline_dynptr.c | 487 +++++++++ .../bpf/progs/test_parse_tcp_hdr_opt.c | 119 +++ .../bpf/progs/test_parse_tcp_hdr_opt_dynptr.c | 114 ++ .../selftests/bpf/progs/test_xdp_dynptr.c | 257 +++++ .../selftests/bpf/test_tcp_hdr_options.h | 1 + 15 files changed, 2522 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/bpf/bpf_kfuncs.h create mode 100644 tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c create mode 100644 tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c create mode 100644 tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c create mode 100644 tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c create mode 100644 tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_dynptr.c diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index b89eb87034e49..a02a085e7f32b 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of bpf_cookie # failed to open_and_load program: -524 (trampoline) bpf_loop # attaches to __x64_sys_nanosleep cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) +dynptr/test_dynptr_skb_data +dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h new file mode 100644 index 0000000000000..8c993ec8ceea6 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -0,0 +1,38 @@ +#ifndef __BPF_KFUNCS__ +#define __BPF_KFUNCS__ + +/* Description + * Initializes an skb-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Initializes an xdp-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Obtain a read-only pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +/* Description + * Obtain a read-write pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 224f016b0a537..2a55f717fc07a 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -13,6 +13,7 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_dynptr.skel.h" #include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK @@ -446,6 +447,28 @@ cleanup: close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } +static void test_cls_redirect_dynptr(void) +{ + struct test_cls_redirect_dynptr *skel; + int err; + + skel = test_cls_redirect_dynptr__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_dynptr__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_dynptr__destroy(skel); +} + static void test_cls_redirect_inlined(void) { struct test_cls_redirect *skel; @@ -496,4 +519,6 @@ void test_cls_redirect(void) test_cls_redirect_inlined(); if (test__start_subtest("cls_redirect_subprogs")) test_cls_redirect_subprogs(); + if (test__start_subtest("cls_redirect_dynptr")) + test_cls_redirect_dynptr(); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b99264ec0d9ca..d176c34a7d2e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -2,20 +2,32 @@ /* Copyright (c) 2022 Facebook */ #include +#include #include "dynptr_fail.skel.h" #include "dynptr_success.skel.h" -static const char * const success_tests[] = { - "test_read_write", - "test_data_slice", - "test_ringbuf", +enum test_setup_type { + SETUP_SYSCALL_SLEEP, + SETUP_SKB_PROG, }; -static void verify_success(const char *prog_name) +static struct { + const char *prog_name; + enum test_setup_type type; +} success_tests[] = { + {"test_read_write", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_ringbuf", SETUP_SYSCALL_SLEEP}, + {"test_skb_readonly", SETUP_SKB_PROG}, + {"test_dynptr_skb_data", SETUP_SKB_PROG}, +}; + +static void verify_success(const char *prog_name, enum test_setup_type setup_type) { struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; + int err; skel = dynptr_success__open(); if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) @@ -23,23 +35,53 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); - dynptr_success__load(skel); - if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) - goto cleanup; - prog = bpf_object__find_program_by_name(skel->obj, prog_name); if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto cleanup; - link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + bpf_program__set_autoload(prog, true); + + err = dynptr_success__load(skel); + if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; - usleep(1); + switch (setup_type) { + case SETUP_SYSCALL_SLEEP: + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; - ASSERT_EQ(skel->bss->err, 0, "err"); + usleep(1); + + bpf_link__destroy(link); + break; + case SETUP_SKB_PROG: + { + int prog_fd; + char buf[64]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); - bpf_link__destroy(link); + prog_fd = bpf_program__fd(prog); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } + } + + ASSERT_EQ(skel->bss->err, 0, "err"); cleanup: dynptr_success__destroy(skel); @@ -50,10 +92,10 @@ void test_dynptr(void) int i; for (i = 0; i < ARRAY_SIZE(success_tests); i++) { - if (!test__start_subtest(success_tests[i])) + if (!test__start_subtest(success_tests[i].prog_name)) continue; - verify_success(success_tests[i]); + verify_success(success_tests[i].prog_name, success_tests[i].type); } RUN_TESTS(dynptr_fail); diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 9c1a18573ffdb..1eab286b14fe6 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -93,4 +93,6 @@ void test_l4lb_all(void) test_l4lb("test_l4lb.bpf.o"); if (test__start_subtest("l4lb_noinline")) test_l4lb("test_l4lb_noinline.bpf.o"); + if (test__start_subtest("l4lb_noinline_dynptr")) + test_l4lb("test_l4lb_noinline_dynptr.bpf.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c new file mode 100644 index 0000000000000..daa952711d8fd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "test_parse_tcp_hdr_opt.skel.h" +#include "test_parse_tcp_hdr_opt_dynptr.skel.h" +#include "test_tcp_hdr_options.h" + +struct test_pkt { + struct ipv6_packet pk6_v6; + u8 options[16]; +} __packed; + +struct test_pkt pkt = { + .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .pk6_v6.iph.nexthdr = IPPROTO_TCP, + .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .pk6_v6.tcp.urg_ptr = 123, + .pk6_v6.tcp.doff = 9, /* 16 bytes of options */ + + .options = { + TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP, + 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL + }, +}; + +static void test_parse_opt(void) +{ + struct test_parse_tcp_hdr_opt *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt__destroy(skel); +} + +static void test_parse_opt_dynptr(void) +{ + struct test_parse_tcp_hdr_opt_dynptr *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt_dynptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt_dynptr__destroy(skel); +} + +void test_parse_tcp_hdr_opt(void) +{ + if (test__start_subtest("parse_tcp_hdr_opt")) + test_parse_opt(); + if (test__start_subtest("parse_tcp_hdr_opt_dynptr")) + test_parse_opt_dynptr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index d4cd9f873c14a..fa3cac5488f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,11 +4,10 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void serial_test_xdp_attach(void) +static void test_xdp_attach(const char *file) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; - const char *file = "./test_xdp.bpf.o"; struct bpf_prog_info info = {}; int err, fd1, fd2, fd3; LIBBPF_OPTS(bpf_xdp_attach_opts, opts); @@ -85,3 +84,11 @@ out_2: out_1: bpf_object__close(obj1); } + +void serial_test_xdp_attach(void) +{ + if (test__start_subtest("xdp_attach")) + test_xdp_attach("./test_xdp.bpf.o"); + if (test__start_subtest("xdp_attach_dynptr")) + test_xdp_attach("./test_xdp_dynptr.bpf.o"); +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index aa5b69354b910..20ce920d891da 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -5,7 +5,9 @@ #include #include #include +#include #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; @@ -244,6 +246,27 @@ done: return 0; } +/* A data slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + *(__u8*)(hdr + 1) = 1; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -399,7 +422,6 @@ int invalid_helper2(void *ctx) /* this should fail */ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); - return 0; } @@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx) return 0; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R0 cannot write into rdonly_mem") +int skb_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice2(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice3(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice4(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice1(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return XDP_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice2(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 9; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return XDP_PASS; +} + +/* Only supported prog type can create skb-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed") +int skb_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(ctx, 0, &ptr); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") @@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx) return 0; } +/* Only supported prog type can create xdp-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed") +int xdp_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_xdp(ctx, 0, &ptr); + + return 0; +} + +__u32 hdr_size = sizeof(struct ethhdr); +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("unbounded memory access") +int dynptr_slice_var_len1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail */ + hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + + return SK_PASS; +} + +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("must be a known constant") +int dynptr_slice_var_len2(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + if (hdr_size <= sizeof(buffer)) { + /* this should fail */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + hdr->h_proto = 12; + } + + return SK_PASS; +} + static int callback(__u32 index, void *data) { *(__u32 *)data = 123; @@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx) return 0; } + +/* Program types that don't allow writes to packet data should fail if + * bpf_dynptr_slice_rdwr is called + */ +SEC("cgroup_skb/ingress") +__failure __msg("the prog does not allow writes to packet data") +int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail since cgroup_skb doesn't allow + * changing packet data + */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 35db7c6c1fc74..c8358a7c79245 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -5,6 +5,7 @@ #include #include #include "bpf_misc.h" +#include "bpf_kfuncs.h" #include "errno.h" char _license[] SEC("license") = "GPL"; @@ -30,7 +31,7 @@ struct { __type(value, __u32); } array_map SEC(".maps"); -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_read_write(void *ctx) { char write_data[64] = "hello there, world!!"; @@ -61,8 +62,8 @@ int test_read_write(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") -int test_data_slice(void *ctx) +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_data(void *ctx) { __u32 key = 0, val = 235, *map_val; struct bpf_dynptr ptr; @@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -163,3 +164,49 @@ done: bpf_ringbuf_discard_dynptr(&ptr, 0); return 0; } + +SEC("?cgroup_skb/egress") +int test_skb_readonly(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* since cgroup skbs are read only, writes should fail */ + ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + if (ret != -EINVAL) { + err = 2; + return 1; + } + + return 1; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_data(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This should return NULL. Must use bpf_dynptr_slice API */ + data = bpf_dynptr_data(&ptr, 0, 1); + if (data) { + err = 2; + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c new file mode 100644 index 0000000000000..f45a7095de7a7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -0,0 +1,980 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2019, 2020 Cloudflare + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test_cls_redirect.h" +#include "bpf_kfuncs.h" + +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) + +#define IP_OFFSET_MASK (0x1FFF) +#define IP_MF (0x2000) + +char _license[] SEC("license") = "Dual BSD/GPL"; + +/** + * Destination port and IP used for UDP encapsulation. + */ +volatile const __be16 ENCAPSULATION_PORT; +volatile const __be32 ENCAPSULATION_IP; + +typedef struct { + uint64_t processed_packets_total; + uint64_t l3_protocol_packets_total_ipv4; + uint64_t l3_protocol_packets_total_ipv6; + uint64_t l4_protocol_packets_total_tcp; + uint64_t l4_protocol_packets_total_udp; + uint64_t accepted_packets_total_syn; + uint64_t accepted_packets_total_syn_cookies; + uint64_t accepted_packets_total_last_hop; + uint64_t accepted_packets_total_icmp_echo_request; + uint64_t accepted_packets_total_established; + uint64_t forwarded_packets_total_gue; + uint64_t forwarded_packets_total_gre; + + uint64_t errors_total_unknown_l3_proto; + uint64_t errors_total_unknown_l4_proto; + uint64_t errors_total_malformed_ip; + uint64_t errors_total_fragmented_ip; + uint64_t errors_total_malformed_icmp; + uint64_t errors_total_unwanted_icmp; + uint64_t errors_total_malformed_icmp_pkt_too_big; + uint64_t errors_total_malformed_tcp; + uint64_t errors_total_malformed_udp; + uint64_t errors_total_icmp_echo_replies; + uint64_t errors_total_malformed_encapsulation; + uint64_t errors_total_encap_adjust_failed; + uint64_t errors_total_encap_buffer_too_small; + uint64_t errors_total_redirect_loop; + uint64_t errors_total_encap_mtu_violate; +} metrics_t; + +typedef enum { + INVALID = 0, + UNKNOWN, + ECHO_REQUEST, + SYN, + SYN_COOKIE, + ESTABLISHED, +} verdict_t; + +typedef struct { + uint16_t src, dst; +} flow_ports_t; + +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv4.dport) - + offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv6.dport) - + offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); + +struct iphdr_info { + void *hdr; + __u64 len; +}; + +typedef int ret_t; + +/* This is a bit of a hack. We need a return value which allows us to + * indicate that the regular flow of the program should continue, + * while allowing functions to use XDP_PASS and XDP_DROP, etc. + */ +static const ret_t CONTINUE_PROCESSING = -1; + +/* Convenience macro to call functions which return ret_t. + */ +#define MAYBE_RETURN(x) \ + do { \ + ret_t __ret = x; \ + if (__ret != CONTINUE_PROCESSING) \ + return __ret; \ + } while (0) + +static bool ipv4_is_fragment(const struct iphdr *ip) +{ + uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); + return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; +} + +static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr) +{ + if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*iphdr); + + if (iphdr->ihl < 5) + return -1; + + /* skip ipv4 options */ + *offset += (iphdr->ihl - 5) * 4; + + return 0; +} + +/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ +static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports) +{ + if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0)) + return false; + + *offset += sizeof(*ports); + + /* Ports in the L4 headers are reversed, since we are parsing an ICMP + * payload which is going towards the eyeball. + */ + uint16_t dst = ports->src; + ports->src = ports->dst; + ports->dst = dst; + return true; +} + +static uint16_t pkt_checksum_fold(uint32_t csum) +{ + /* The highest reasonable value for an IPv4 header + * checksum requires two folds, so we just do that always. + */ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (uint16_t)~csum; +} + +static void pkt_ipv4_checksum(struct iphdr *iph) +{ + iph->check = 0; + + /* An IP header without options is 20 bytes. Two of those + * are the checksum, which we always set to zero. Hence, + * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, + * which fits in 32 bit. + */ + _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); + uint32_t acc = 0; + uint16_t *ipw = (uint16_t *)iph; + + for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) + acc += ipw[i]; + + iph->check = pkt_checksum_fold(acc); +} + +static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset, + const struct ipv6hdr *ipv6, uint8_t *upper_proto, + bool *is_fragment) +{ + /* We understand five extension headers. + * https://tools.ietf.org/html/rfc8200#section-4.1 states that all + * headers should occur once, except Destination Options, which may + * occur twice. Hence we give up after 6 headers. + */ + struct { + uint8_t next; + uint8_t len; + } exthdr = { + .next = ipv6->nexthdr, + }; + *is_fragment = false; + + for (int i = 0; i < 6; i++) { + switch (exthdr.next) { + case IPPROTO_FRAGMENT: + *is_fragment = true; + /* NB: We don't check that hdrlen == 0 as per spec. */ + /* fallthrough; */ + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + case IPPROTO_MH: + if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0)) + return false; + + /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ + *offset += (exthdr.len + 1) * 8; + + /* Decode next header */ + break; + + default: + /* The next header is not one of the known extension + * headers, treat it as the upper layer header. + * + * This handles IPPROTO_NONE. + * + * Encapsulating Security Payload (50) and Authentication + * Header (51) also end up here (and will trigger an + * unknown proto error later). They have a custom header + * format and seem too esoteric to care about. + */ + *upper_proto = exthdr.next; + return true; + } + } + + /* We never found an upper layer header. */ + return false; +} + +static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6, + uint8_t *proto, bool *is_fragment) +{ + if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*ipv6); + + if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment)) + return -1; + + return 0; +} + +/* Global metrics, per CPU + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static metrics_t *get_global_metrics(void) +{ + uint64_t key = 0; + return bpf_map_lookup_elem(&metrics_map, &key); +} + +static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = payload_off - sizeof(struct ethhdr); + + /* Changing the ethertype if the encapsulated packet is ipv6 */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) + encap->eth.h_proto = bpf_htons(ETH_P_IPV6); + + if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) + return TC_ACT_SHOT; + + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = + payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); + int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; + __u8 encap_buffer[sizeof(encap_gre_t)] = {}; + uint16_t proto = ETH_P_IP; + uint32_t mtu_len = 0; + encap_gre_t *encap_gre; + + metrics->forwarded_packets_total_gre++; + + /* Loop protection: the inner packet's TTL is decremented as a safeguard + * against any forwarding loop. As the only interesting field is the TTL + * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes + * as they handle the split packets if needed (no need for the data to be + * in the linear section). + */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) { + proto = ETH_P_IPV6; + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1, 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } else { + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, + 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + /* IPv4 also has a checksum to patch. While the TTL is only one byte, + * this function only works for 2 and 4 bytes arguments (the result is + * the same). + */ + rc = bpf_l3_csum_replace( + skb, payload_off + offsetof(struct iphdr, check), ttl, + ttl - 1, 2); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, + 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } + + if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { + metrics->errors_total_encap_mtu_violate++; + return TC_ACT_SHOT; + } + + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { + metrics->errors_total_encap_adjust_failed++; + return TC_ACT_SHOT; + } + + if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap_gre) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre->ip.protocol = IPPROTO_GRE; + encap_gre->ip.daddr = next_hop->s_addr; + encap_gre->ip.saddr = ENCAPSULATION_IP; + encap_gre->ip.tot_len = + bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); + encap_gre->gre.flags = 0; + encap_gre->gre.protocol = bpf_htons(proto); + pkt_ipv4_checksum((void *)&encap_gre->ip); + + if (encap_gre == encap_buffer) + bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + /* swap L2 addresses */ + /* This assumes that packets are received from a router. + * So just swapping the MAC addresses here will make the packet go back to + * the router, which will send it to the appropriate machine. + */ + unsigned char temp[ETH_ALEN]; + memcpy(temp, encap->eth.h_dest, sizeof(temp)); + memcpy(encap->eth.h_dest, encap->eth.h_source, + sizeof(encap->eth.h_dest)); + memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); + + if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && + encap->unigue.last_hop_gre) { + return forward_with_gre(skb, dynptr, encap, next_hop, metrics); + } + + metrics->forwarded_packets_total_gue++; + uint32_t old_saddr = encap->ip.saddr; + encap->ip.saddr = encap->ip.daddr; + encap->ip.daddr = next_hop->s_addr; + if (encap->unigue.next_hop < encap->unigue.hop_count) { + encap->unigue.next_hop++; + } + + /* Remove ip->saddr, add next_hop->s_addr */ + const uint64_t off = offsetof(typeof(*encap), ip.check); + int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); + if (ret < 0) { + return TC_ACT_SHOT; + } + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t skip_next_hops(__u64 *offset, int n) +{ + __u32 res; + switch (n) { + case 1: + *offset += sizeof(struct in_addr); + case 0: + return CONTINUE_PROCESSING; + + default: + return TC_ACT_SHOT; + } +} + +/* Get the next hop from the GLB header. + * + * Sets next_hop->s_addr to 0 if there are no more hops left. + * pkt is positioned just after the variable length GLB header + * iff the call is successful. + */ +static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap, + struct in_addr *next_hop) +{ + if (encap->unigue.next_hop > encap->unigue.hop_count) + return TC_ACT_SHOT; + + /* Skip "used" next hops. */ + MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop)); + + if (encap->unigue.next_hop == encap->unigue.hop_count) { + /* No more next hops, we are at the end of the GLB header. */ + next_hop->s_addr = 0; + return CONTINUE_PROCESSING; + } + + if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0)) + return TC_ACT_SHOT; + + *offset += sizeof(*next_hop); + + /* Skip the remainig next hops (may be zero). */ + return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1); +} + +/* Fill a bpf_sock_tuple to be used with the socket lookup functions. + * This is a kludge that let's us work around verifier limitations: + * + * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) + * + * clang will substitue a costant for sizeof, which allows the verifier + * to track it's value. Based on this, it can figure out the constant + * return value, and calling code works while still being "generic" to + * IPv4 and IPv6. + */ +static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) +{ + switch (iphlen) { + case sizeof(struct iphdr): { + struct iphdr *ipv4 = (struct iphdr *)iph; + tuple->ipv4.daddr = ipv4->daddr; + tuple->ipv4.saddr = ipv4->saddr; + tuple->ipv4.sport = sport; + tuple->ipv4.dport = dport; + return sizeof(tuple->ipv4); + } + + case sizeof(struct ipv6hdr): { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; + memcpy(&tuple->ipv6.daddr, &ipv6->daddr, + sizeof(tuple->ipv6.daddr)); + memcpy(&tuple->ipv6.saddr, &ipv6->saddr, + sizeof(tuple->ipv6.saddr)); + tuple->ipv6.sport = sport; + tuple->ipv6.dport = dport; + return sizeof(tuple->ipv6); + } + + default: + return 0; + } +} + +static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, + uint64_t tuplen, void *iph, struct tcphdr *tcp) +{ + struct bpf_sock *sk = + bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + if (iph != NULL && tcp != NULL) { + /* Kludge: we've run out of arguments, but need the length of the ip header. */ + uint64_t iphlen = sizeof(struct iphdr); + + if (tuplen == sizeof(tuple->ipv6)) + iphlen = sizeof(struct ipv6hdr); + + if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, + sizeof(*tcp)) == 0) { + bpf_sk_release(sk); + return SYN_COOKIE; + } + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen) +{ + struct bpf_sock *sk = + bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state == BPF_TCP_ESTABLISHED) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple, + uint64_t tuplen, metrics_t *metrics) +{ + switch (proto) { + case IPPROTO_TCP: + return classify_tcp(skb, tuple, tuplen, NULL, NULL); + + case IPPROTO_UDP: + return classify_udp(skb, tuple, tuplen); + + default: + metrics->errors_total_malformed_icmp++; + return INVALID; + } +} + +static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset, + metrics_t *metrics) +{ + struct icmphdr icmp; + struct iphdr ipv4; + + if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + *offset += sizeof(icmp); + + /* We should never receive encapsulated echo replies. */ + if (icmp.type == ICMP_ECHOREPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp.type == ICMP_ECHO) + return ECHO_REQUEST; + + if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + /* The source address in the outer IP header is from the entity that + * originated the ICMP message. Use the original IP header to restore + * the correct flow tuple. + */ + struct bpf_sock_tuple tuple; + tuple.ipv4.saddr = ipv4.daddr; + tuple.ipv4.daddr = ipv4.saddr; + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, ipv4.protocol, &tuple, + sizeof(tuple.ipv4), metrics); +} + +static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct ipv6hdr ipv6; + struct icmp6hdr icmp6; + bool is_fragment; + uint8_t l4_proto; + + if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + /* We should never receive encapsulated echo replies. */ + if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { + return ECHO_REQUEST; + } + + if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + /* Swap source and dest addresses. */ + memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr)); + memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr)); + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6), + metrics); +} + +static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct tcphdr tcp; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_tcp++; + + if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_tcp++; + return INVALID; + } + + *offset += sizeof(tcp); + + if (tcp.syn) + return SYN; + + tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest); + return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp); +} + +static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct udphdr udph; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_udp++; + + if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) { + metrics->errors_total_malformed_udp++; + return INVALID; + } + *offset += sizeof(udph); + + tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest); + return classify_udp(skb, &tuple, tuplen); +} + +static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct iphdr ipv4; + struct iphdr_info info = { + .hdr = &ipv4, + .len = sizeof(ipv4), + }; + + metrics->l3_protocol_packets_total_ipv4++; + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4.version != 4) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4_is_fragment(&ipv4)) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (ipv4.protocol) { + case IPPROTO_ICMP: + return process_icmpv4(skb, dynptr, offset, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct ipv6hdr ipv6; + struct iphdr_info info = { + .hdr = &ipv6, + .len = sizeof(ipv6), + }; + uint8_t l4_proto; + bool is_fragment; + + metrics->l3_protocol_packets_total_ipv6++; + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv6.version != 6) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (l4_proto) { + case IPPROTO_ICMPV6: + return process_icmpv6(dynptr, offset, skb, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +SEC("tc") +int cls_redirect(struct __sk_buff *skb) +{ + __u8 encap_buffer[sizeof(encap_headers_t)] = {}; + struct bpf_dynptr dynptr; + struct in_addr next_hop; + /* Tracks offset of the dynptr. This will be unnecessary once + * bpf_dynptr_advance() is available. + */ + __u64 off = 0; + ret_t ret; + + bpf_dynptr_from_skb(skb, 0, &dynptr); + + metrics_t *metrics = get_global_metrics(); + if (metrics == NULL) + return TC_ACT_SHOT; + + metrics->processed_packets_total++; + + /* Pass bogus packets as long as we're not sure they're + * destined for us. + */ + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + encap_headers_t *encap; + + /* Make sure that all encapsulation headers are available in + * the linear portion of the skb. This makes it easy to manipulate them. + */ + if (bpf_skb_pull_data(skb, sizeof(*encap))) + return TC_ACT_OK; + + encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap) + return TC_ACT_OK; + + off += sizeof(*encap); + + if (encap->ip.ihl != 5) + /* We never have any options. */ + return TC_ACT_OK; + + if (encap->ip.daddr != ENCAPSULATION_IP || + encap->ip.protocol != IPPROTO_UDP) + return TC_ACT_OK; + + /* TODO Check UDP length? */ + if (encap->udp.dest != ENCAPSULATION_PORT) + return TC_ACT_OK; + + /* We now know that the packet is destined to us, we can + * drop bogus ones. + */ + if (ipv4_is_fragment((void *)&encap->ip)) { + metrics->errors_total_fragmented_ip++; + return TC_ACT_SHOT; + } + + if (encap->gue.variant != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.control != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.flags != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.hlen != + sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.version != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.reserved != 0) + return TC_ACT_SHOT; + + MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop)); + + if (next_hop.s_addr == 0) { + metrics->accepted_packets_total_last_hop++; + return accept_locally(skb, encap); + } + + verdict_t verdict; + switch (encap->gue.proto_ctype) { + case IPPROTO_IPIP: + verdict = process_ipv4(skb, &dynptr, &off, metrics); + break; + + case IPPROTO_IPV6: + verdict = process_ipv6(skb, &dynptr, &off, metrics); + break; + + default: + metrics->errors_total_unknown_l3_proto++; + return TC_ACT_SHOT; + } + + switch (verdict) { + case INVALID: + /* metrics have already been bumped */ + return TC_ACT_SHOT; + + case UNKNOWN: + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics); + + case ECHO_REQUEST: + metrics->accepted_packets_total_icmp_echo_request++; + break; + + case SYN: + if (encap->unigue.forward_syn) { + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, + metrics); + } + + metrics->accepted_packets_total_syn++; + break; + + case SYN_COOKIE: + metrics->accepted_packets_total_syn_cookies++; + break; + + case ESTABLISHED: + metrics->accepted_packets_total_established++; + break; + } + + ret = accept_locally(skb, encap); + + if (encap == encap_buffer) + bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c new file mode 100644 index 0000000000000..f997f50807487 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_iptunnel_common.h" +#include + +#include "bpf_kfuncs.h" + +static __always_inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __noinline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_VIPS); + __type(key, struct vip); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_VIPS); + __type(key, __u32); + __type(value, struct vip_stats); +} stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CTL_MAP_SIZE); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); + +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return false; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!icmp_hdr) + return TC_ACT_SHOT; + + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer_icmp[sizeof(struct iphdr)] = {}; + __u8 buffer_ip[sizeof(struct iphdr)] = {}; + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp)); + if (!icmp_hdr) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct udphdr)] = {}; + struct udphdr *udp; + + udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!udp) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + struct tcphdr *tcp; + + tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!tcp) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __noinline int process_packet(struct bpf_dynptr *skb_ptr, + struct eth_hdr *eth, __u64 off, + bool is_ipv6, struct __sk_buff *skb) +{ + struct packet_description pckt = {}; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + __u8 buffer[sizeof(struct iphdr)] = {}; + + iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("tc") +int balancer_ingress(struct __sk_buff *ctx) +{ + __u8 buffer[sizeof(struct eth_hdr)] = {}; + struct bpf_dynptr ptr; + struct eth_hdr *eth; + __u32 eth_proto; + __u32 nh_off; + int err; + + nh_off = sizeof(struct eth_hdr); + + bpf_dynptr_from_skb(ctx, 0, &ptr); + eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + err = process_packet(&ptr, eth, nh_off, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + err = process_packet(&ptr, eth, nh_off, true, ctx); + else + return TC_ACT_SHOT; + + if (eth == buffer) + bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); + + return err; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c new file mode 100644 index 0000000000000..79bab9b50e9e5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This parsing logic is taken from the open source library katran, a layer 4 + * load balancer. + * + * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c + * + * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h + */ + +#include +#include +#include +#include +#include +#include +#include "test_tcp_hdr_options.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +struct hdr_opt_state { + __u32 server_id; + __u8 byte_offset; + __u8 hdr_bytes_remaining; +}; + +static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + __u8 *tcp_opt, kind, hdr_len; + + tcp_opt = (__u8 *)(data + state->byte_offset); + if (tcp_opt + 1 > data_end) + return -1; + + kind = tcp_opt[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + state->hdr_bytes_remaining--; + state->byte_offset++; + return 0; + } + + if (state->hdr_bytes_remaining < 2 || + tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end) + return -1; + + hdr_len = tcp_opt[1]; + if (hdr_len > state->hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + if (tcp_opt + tcp_hdr_opt_len_tpr > data_end) + return -1; + + state->server_id = *(__u32 *)&tcp_opt[2]; + return 1; + } + + state->hdr_bytes_remaining -= hdr_len; + state->byte_offset += hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + struct hdr_opt_state opt_state = {}; + __u8 tcp_hdr_opt_len = 0; + struct tcphdr *tcp_hdr; + __u64 tcp_offset = 0; + __u32 off; + int err; + + tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + tcp_hdr = (struct tcphdr *)(data + tcp_offset); + if (tcp_hdr + 1 > data_end) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + opt_state.hdr_bytes_remaining = tcp_hdr_opt_len; + opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset; + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(xdp, &opt_state); + + if (err || !opt_state.hdr_bytes_remaining) + break; + } + + if (!opt_state.server_id) + return XDP_DROP; + + server_id = opt_state.server_id; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c new file mode 100644 index 0000000000000..d3b319722e309 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This logic is lifted from a real-world use case of packet parsing, used in + * the open source library katran, a layer 4 load balancer. + * + * This test demonstrates how to parse packet contents using dynptrs. The + * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c + */ + +#include +#include +#include +#include +#include +#include +#include "test_tcp_hdr_options.h" +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining, + __u32 *server_id) +{ + __u8 *tcp_opt, kind, hdr_len; + __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)]; + __u8 *data; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer)); + if (!data) + return -1; + + kind = data[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + *off += 1; + *hdr_bytes_remaining -= 1; + return 0; + } + + if (*hdr_bytes_remaining < 2) + return -1; + + hdr_len = data[1]; + if (hdr_len > *hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id)); + return 1; + } + + *off += hdr_len; + *hdr_bytes_remaining -= hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + __u8 hdr_bytes_remaining; + struct tcphdr *tcp_hdr; + __u8 tcp_hdr_opt_len; + int err = 0; + __u32 off; + + struct bpf_dynptr ptr; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + + off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + + tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer)); + if (!tcp_hdr) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + hdr_bytes_remaining = tcp_hdr_opt_len; + + off += sizeof(struct tcphdr); + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id); + + if (err || !hdr_bytes_remaining) + break; + } + + if (!server_id) + return XDP_DROP; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c new file mode 100644 index 0000000000000..7521a805b5068 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_iptunnel_common.h" +#include "bpf_kfuncs.h" + +const size_t tcphdr_sz = sizeof(struct tcphdr); +const size_t udphdr_sz = sizeof(struct udphdr); +const size_t ethhdr_sz = sizeof(struct ethhdr); +const size_t iphdr_sz = sizeof(struct iphdr); +const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_IPTNL_ENTRIES); + __type(key, struct vip); + __type(value, struct iptnl_info); +} vip2tnl SEC(".maps"); + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz]; + __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz]; + __u8 iph_buffer_udp[iphdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct iphdr *iph; + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp)); + __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp)); + else + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp)); + + if (!iph) + return XDP_DROP; + + dport = get_dport(iph + 1, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + iph = (struct iphdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(iph + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + iph->version = 4; + iph->ihl = iphdr_sz >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + iphdr_sz); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; + for (i = 0; i < iphdr_sz >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz]; + __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz]; + __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct ipv6hdr *ip6h; + __u16 payload_len; + struct vip vip = {}; + int dport; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp)); + __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp)); + else + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp)); + + if (!ip6h) + return XDP_DROP; + + dport = get_dport(ip6h + 1, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + ip6h = (struct ipv6hdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(ip6h + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + __u8 buffer[ethhdr_sz]; + struct bpf_dynptr ptr; + struct ethhdr *eth; + __u16 h_proto; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp, &ptr); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp, &ptr); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h index 6118e3ab61fc3..56c9f8a3ad3d9 100644 --- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -50,6 +50,7 @@ struct linum_err { #define TCPOPT_EOL 0 #define TCPOPT_NOP 1 +#define TCPOPT_MSS 2 #define TCPOPT_WINDOW 3 #define TCPOPT_EXP 254 -- GitLab From 65334e64a493c6a0976de7ad56bf8b7a9ff04b4a Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 25 Feb 2023 16:40:08 +0100 Subject: [PATCH 0095/2078] bpf: Support kptrs in percpu hashmap and percpu LRU hashmap Enable support for kptrs in percpu BPF hashmap and percpu BPF LRU hashmap by wiring up the freeing of these kptrs from percpu map elements. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230225154010.391965-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 59 +++++++++++++++++++++++++++----------------- kernel/bpf/syscall.c | 2 ++ 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5dfcb5ad0d068..653aeb481c794 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -249,7 +249,18 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab) struct htab_elem *elem; elem = get_htab_elem(htab, i); - bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); + if (htab_is_percpu(htab)) { + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); + int cpu; + + for_each_possible_cpu(cpu) { + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + cond_resched(); + } + } else { + bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); + cond_resched(); + } cond_resched(); } } @@ -759,9 +770,17 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, static void check_and_free_fields(struct bpf_htab *htab, struct htab_elem *elem) { - void *map_value = elem->key + round_up(htab->map.key_size, 8); + if (htab_is_percpu(htab)) { + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); + int cpu; - bpf_obj_free_fields(htab->map.record, map_value); + for_each_possible_cpu(cpu) + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + } else { + void *map_value = elem->key + round_up(htab->map.key_size, 8); + + bpf_obj_free_fields(htab->map.record, map_value); + } } /* It is called from the bpf_lru_list when the LRU needs to delete @@ -858,9 +877,9 @@ find_first_elem: static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { + check_and_free_fields(htab, l); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); - check_and_free_fields(htab, l); bpf_mem_cache_free(&htab->ma, l); } @@ -918,14 +937,13 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, { if (!onallcpus) { /* copy true value_size bytes */ - memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); + copy_map_value(&htab->map, this_cpu_ptr(pptr), value); } else { u32 size = round_up(htab->map.value_size, 8); int off = 0, cpu; for_each_possible_cpu(cpu) { - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), - value + off, size); + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); off += size; } } @@ -940,16 +958,14 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, * (onallcpus=false always when coming from bpf prog). */ if (!onallcpus) { - u32 size = round_up(htab->map.value_size, 8); int current_cpu = raw_smp_processor_id(); int cpu; for_each_possible_cpu(cpu) { if (cpu == current_cpu) - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, - size); - else - memset(per_cpu_ptr(pptr, cpu), 0, size); + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); + else /* Since elem is preallocated, we cannot touch special fields */ + zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); } } else { pcpu_copy_value(htab, pptr, value, onallcpus); @@ -1575,9 +1591,8 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, pptr = htab_elem_get_ptr(l, key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, - per_cpu_ptr(pptr, cpu), - roundup_value_size); + copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, value + off); off += roundup_value_size; } } else { @@ -1772,8 +1787,8 @@ again_nocopy: pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(dst_val + off, - per_cpu_ptr(pptr, cpu), size); + copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(&htab->map, dst_val + off); off += size; } } else { @@ -2046,9 +2061,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) roundup_value_size = round_up(map->value_size, 8); pptr = htab_elem_get_ptr(elem, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(info->percpu_value_buf + off, - per_cpu_ptr(pptr, cpu), - roundup_value_size); + copy_map_value_long(map, info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, info->percpu_value_buf + off); off += roundup_value_size; } ctx.value = info->percpu_value_buf; @@ -2292,8 +2307,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) */ pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { - bpf_long_memcpy(value + off, - per_cpu_ptr(pptr, cpu), size); + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); + check_and_init_map_value(map, value + off); off += size; } ret = 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e3fcdc9836a6c..da117a2a83b23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1059,7 +1059,9 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, case BPF_KPTR_UNREF: case BPF_KPTR_REF: if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { ret = -EOPNOTSUPP; -- GitLab From 9db44fdd8105da00669d425acab887c668df75f6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 25 Feb 2023 16:40:09 +0100 Subject: [PATCH 0096/2078] bpf: Support kptrs in local storage maps Enable support for kptrs in local storage maps by wiring up the freeing of these kptrs from map value. Freeing of bpf_local_storage_map is only delayed in case there are special fields, therefore bpf_selem_free_* path can also only dereference smap safely in that case. This is recorded using a bool utilizing a hole in bpF_local_storage_elem. It could have been tagged in the pointer value smap using the lowest bit (since alignment > 1), but since there was already a hole I went with the simpler option. Only the map structure freeing is delayed using RCU barriers, as the buckets aren't used when selem is being freed, so they can be freed once all readers of the bucket lists can no longer access it. Cc: Martin KaFai Lau Cc: KP Singh Cc: Paul E. McKenney Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230225154010.391965-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 6 ++++ kernel/bpf/bpf_local_storage.c | 48 ++++++++++++++++++++++++++++--- kernel/bpf/syscall.c | 6 +++- kernel/bpf/verifier.c | 12 +++++--- 4 files changed, 63 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 6d37a40cd90e8..0fe92986412b7 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -74,6 +74,12 @@ struct bpf_local_storage_elem { struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; struct rcu_head rcu; + bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU + * callbacks? bpf_local_storage_map_free only + * executes rcu_barrier when there are special + * fields, this field remembers that to ensure we + * don't access already freed smap in sdata. + */ /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 58da17ae51241..2bdd722fe2934 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -85,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); + /* No need to call check_and_init_map_value as memory is zero init */ return selem; } @@ -113,10 +114,25 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* The can_use_smap bool is set whenever we need to free additional + * fields in selem data before freeing selem. bpf_local_storage_map_free + * only executes rcu_barrier to wait for RCU callbacks when it has + * special fields, hence we can only conditionally dereference smap, as + * by this time the map might have already been freed without waiting + * for our call_rcu callback if it did not have any special fields. + */ + if (selem->can_use_smap) + bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data); + kfree(selem); +} + +static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu) +{ + /* Free directly if Tasks Trace RCU GP also implies RCU GP */ if (rcu_trace_implies_rcu_gp()) - kfree(selem); + bpf_selem_free_rcu(rcu); else - kfree_rcu(selem, rcu); + call_rcu(rcu, bpf_selem_free_rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -170,9 +186,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu); else - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, bpf_selem_free_rcu); return free_local_storage; } @@ -240,6 +256,11 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, RCU_INIT_POINTER(SDATA(selem)->smap, smap); hlist_add_head_rcu(&selem->map_node, &b->list); raw_spin_unlock_irqrestore(&b->lock, flags); + + /* If our data will have special fields, smap will wait for us to use + * its record in bpf_selem_free_* RCU callbacks before freeing itself. + */ + selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record); } void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) @@ -723,6 +744,25 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); + /* Only delay freeing of smap, buckets are not needed anymore */ kvfree(smap->buckets); + + /* When local storage has special fields, callbacks for + * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using + * the map BTF record, we need to execute an RCU barrier to wait for + * them as the record will be freed right after our map_free callback. + */ + if (!IS_ERR_OR_NULL(smap->map.record)) { + rcu_barrier_tasks_trace(); + /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() + * is true, because while call_rcu invocation is skipped in that + * case in bpf_selem_free_tasks_trace_rcu (and all local storage + * maps pass use_trace_rcu = true), there can be call_rcu + * callbacks based on use_trace_rcu = false in the earlier while + * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock + * called from owner's free path. + */ + rcu_barrier(); + } bpf_map_area_free(smap); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index da117a2a83b23..eb50025b03c18 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1063,7 +1063,11 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && - map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_SK_STORAGE && + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && + map->map_type != BPF_MAP_TYPE_TASK_STORAGE && + map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { ret = -EOPNOTSUPP; goto free_map_tab; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a856896e835a2..bf580f246a013 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7222,22 +7222,26 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, break; case BPF_MAP_TYPE_SK_STORAGE: if (func_id != BPF_FUNC_sk_storage_get && - func_id != BPF_FUNC_sk_storage_delete) + func_id != BPF_FUNC_sk_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_INODE_STORAGE: if (func_id != BPF_FUNC_inode_storage_get && - func_id != BPF_FUNC_inode_storage_delete) + func_id != BPF_FUNC_inode_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_TASK_STORAGE: if (func_id != BPF_FUNC_task_storage_get && - func_id != BPF_FUNC_task_storage_delete) + func_id != BPF_FUNC_task_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_CGRP_STORAGE: if (func_id != BPF_FUNC_cgrp_storage_get && - func_id != BPF_FUNC_cgrp_storage_delete) + func_id != BPF_FUNC_cgrp_storage_delete && + func_id != BPF_FUNC_kptr_xchg) goto error; break; case BPF_MAP_TYPE_BLOOM_FILTER: -- GitLab From 85521e1ea4d0d7d8e62bbb0999f91e31ae421d76 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 25 Feb 2023 16:40:10 +0100 Subject: [PATCH 0097/2078] selftests/bpf: Add more tests for kptrs in maps Firstly, ensure programs successfully load when using all of the supported maps. Then, extend existing tests to test more cases at runtime. We are currently testing both the synchronous freeing of items and asynchronous destruction when map is freed, but the code needs to be adjusted a bit to be able to also accomodate support for percpu maps. We now do a delete on the item (and update for array maps which has a similar effect for kptrs) to perform a synchronous free of the kptr, and test destruction both for the synchronous and asynchronous deletion. Next time the program runs, it should observe the refcount as 1 since all existing references should have been released by then. By running the program after both possible paths freeing kptrs, we establish that they correctly release resources. Next, we augment the existing test to also test the same code path shared by all local storage maps using a task local storage map. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230225154010.391965-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/map_kptr.c | 136 +++++-- tools/testing/selftests/bpf/progs/map_kptr.c | 344 +++++++++++++++--- .../selftests/bpf/progs/rcu_tasks_trace_gp.c | 36 ++ 3 files changed, 451 insertions(+), 65 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 3533a4ecad018..8743df5995673 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -4,70 +4,160 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" +#include "rcu_tasks_trace_gp.skel.h" static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, lopts); LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + int key = 0, ret, cpu; struct map_kptr *skel; - int key = 0, ret; - char buf[16]; + char buf[16], *pbuf; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); - ASSERT_OK(ret, "test_map_kptr_ref refcount"); - ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts); + ASSERT_OK(ret, "test_map_kptr_ref1 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval"); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval"); + if (test_run) goto exit; + cpu = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus")) + goto exit; + + pbuf = calloc(cpu, sizeof(buf)); + if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)")) + goto exit; + ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map__update_elem(skel->maps.array_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "array_map update2"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__update_elem(skel->maps.pcpu_array_map, + &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); + ASSERT_OK(ret, "pcpu_array_map update"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_map update"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_malloc_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_malloc_map update"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.lru_hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "lru_hash_map update"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "lru_pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete"); + skel->data->ref--; + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval"); + + free(pbuf); exit: map_kptr__destroy(skel); } -void test_map_kptr(void) +static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) { - if (test__start_subtest("success")) { + long gp_seq = READ_ONCE(rcu->bss->gp_seq); + LIBBPF_OPTS(bpf_test_run_opts, opts); + + if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), + &opts), "do_call_rcu_tasks_trace")) + return -EFAULT; + if (!ASSERT_OK(opts.retval, "opts.retval == 0")) + return -EFAULT; + while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) + sched_yield(); + return 0; +} + +void serial_test_map_kptr(void) +{ + struct rcu_tasks_trace_gp *skel; + + RUN_TESTS(map_kptr_fail); + + skel = rcu_tasks_trace_gp__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) + return; + if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) + goto end; + + if (test__start_subtest("success-map")) { + test_map_kptr_success(true); + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ test_map_kptr_success(false); - /* Do test_run twice, so that we see refcount going back to 1 - * after we leave it in map from first iteration. - */ + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on synchronous delete elem */ test_map_kptr_success(true); } - RUN_TESTS(map_kptr_fail); +end: + rcu_tasks_trace_gp__destroy(skel); + return; } diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 228ec45365a8d..a24d17bc17eb7 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -15,6 +15,13 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); +struct pcpu_array_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_array_map SEC(".maps"); + struct hash_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -22,6 +29,13 @@ struct hash_map { __uint(max_entries, 1); } hash_map SEC(".maps"); +struct pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_hash_map SEC(".maps"); + struct hash_malloc_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -30,6 +44,14 @@ struct hash_malloc_map { __uint(map_flags, BPF_F_NO_PREALLOC); } hash_malloc_map SEC(".maps"); +struct pcpu_hash_malloc_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} pcpu_hash_malloc_map SEC(".maps"); + struct lru_hash_map { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, int); @@ -37,6 +59,41 @@ struct lru_hash_map { __uint(max_entries, 1); } lru_hash_map SEC(".maps"); +struct lru_pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_pcpu_hash_map SEC(".maps"); + +struct cgrp_ls_map { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} cgrp_ls_map SEC(".maps"); + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} task_ls_map SEC(".maps"); + +struct inode_ls_map { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} inode_ls_map SEC(".maps"); + +struct sk_ls_map { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} sk_ls_map SEC(".maps"); + #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ struct { \ __uint(type, map_type); \ @@ -160,6 +217,58 @@ int test_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path) +{ + struct map_value *v; + + v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct task_struct *task; + struct map_value *v; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct map_value *v; + + v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("tc") +int test_sk_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { @@ -189,106 +298,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } -SEC("tc") -int test_map_kptr_ref(struct __sk_buff *ctx) +int ref = 1; + +static __always_inline +int test_map_kptr_ref_pre(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; unsigned long arg = 0; - struct map_value *v; - int key = 0, ret; + int ret; p = bpf_kfunc_call_test_acquire(&arg); if (!p) return 1; + ref++; p_st = p->next; - if (p_st->cnt.refs.counter != 2) { + if (p_st->cnt.refs.counter != ref) { ret = 2; goto end; } - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) { - ret = 3; - goto end; - } - p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 4; + ret = 3; goto end; } - if (p_st->cnt.refs.counter != 2) - return 5; + if (p_st->cnt.refs.counter != ref) + return 4; p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); if (!p) - return 6; - if (p_st->cnt.refs.counter != 3) { - ret = 7; + return 5; + ref++; + if (p_st->cnt.refs.counter != ref) { + ret = 6; goto end; } bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 2) - return 8; + ref--; + if (p_st->cnt.refs.counter != ref) + return 7; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 9; + return 8; bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 1) - return 10; + ref--; + if (p_st->cnt.refs.counter != ref) + return 9; p = bpf_kfunc_call_test_acquire(&arg); if (!p) - return 11; + return 10; + ref++; p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 12; + ret = 11; goto end; } - if (p_st->cnt.refs.counter != 2) - return 13; + if (p_st->cnt.refs.counter != ref) + return 12; /* Leave in map */ return 0; end: + ref--; bpf_kfunc_call_test_release(p); return ret; } -SEC("tc") -int test_map_kptr_ref2(struct __sk_buff *ctx) +static __always_inline +int test_map_kptr_ref_post(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; - struct map_value *v; - int key = 0; - - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) - return 1; p_st = v->ref_ptr; - if (!p_st || p_st->cnt.refs.counter != 2) - return 2; + if (!p_st || p_st->cnt.refs.counter != ref) + return 1; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 3; - if (p_st->cnt.refs.counter != 2) { + return 2; + if (p_st->cnt.refs.counter != ref) { bpf_kfunc_call_test_release(p); - return 4; + return 3; } p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { bpf_kfunc_call_test_release(p); - return 5; + return 4; } - if (p_st->cnt.refs.counter != 2) - return 6; + if (p_st->cnt.refs.counter != ref) + return 5; + + return 0; +} + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref1(struct __sk_buff *ctx) +{ + struct map_value *v, val = {}; + int key = 0, ret; + + bpf_map_update_elem(&hash_map, &key, &val, 0); + bpf_map_update_elem(&hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_hash_map, &key, &val, 0); + + bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0); + bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0); + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); + + return 0; +} + +#undef TEST +#undef TEST_PCPU + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, ret; + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); return 0; } +#undef TEST +#undef TEST_PCPU + +SEC("tc") +int test_map_kptr_ref3(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long sp = 0; + + p = bpf_kfunc_call_test_acquire(&sp); + if (!p) + return 1; + ref++; + if (p->cnt.refs.counter != ref) { + bpf_kfunc_call_test_release(p); + return 2; + } + bpf_kfunc_call_test_release(p); + ref--; + return 0; +} + +SEC("syscall") +int test_ls_map_kptr_ref1(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (v) + return 150; + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 200; + return test_map_kptr_ref_pre(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref2(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + return test_map_kptr_ref_post(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref_del(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + if (!v->ref_ptr) + return 300; + return bpf_task_storage_delete(&task_ls_map, current); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c new file mode 100644 index 0000000000000..df4873558634a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_ls_map SEC(".maps"); + +long gp_seq; + +SEC("syscall") +int do_call_rcu_tasks_trace(void *ctx) +{ + struct task_struct *current; + int *v; + + current = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 1; + /* Invoke call_rcu_tasks_trace */ + return bpf_task_storage_delete(&task_ls_map, current); +} + +SEC("kprobe/rcu_tasks_trace_postgp") +int rcu_tasks_trace_postgp(void *ctx) +{ + __sync_add_and_fetch(&gp_seq, 1); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From be35f4af719c94df137cd611bf497d658eb3adc2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 28 Feb 2023 20:03:01 +0800 Subject: [PATCH 0098/2078] selftests/bpf: Set __BITS_PER_LONG if target is bpf for LoongArch If target is bpf, there is no __loongarch__ definition, __BITS_PER_LONG defaults to 32, __NR_nanosleep is not defined: #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_nanosleep 101 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep) #endif Work around this problem, by explicitly setting __BITS_PER_LONG to __loongarch_grlen which is defined by compiler as 64 for LA64. This is similar with commit 36e70b9b06bf ("selftests, bpf: Fix broken riscv build"). Signed-off-by: Tiezhu Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1677585781-21628-1-git-send-email-yangtiezhu@loongson.cn --- tools/testing/selftests/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b677dcd0b77af..f40606a85a0f0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -338,7 +338,8 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) $(2) -v -E - &1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - Date: Wed, 1 Mar 2023 09:53:53 +0100 Subject: [PATCH 0099/2078] libbpf: Remove unnecessary ternary operator Coverity reports that the first check of 'err' in bpf_object__init_maps is always false as 'err' is initialized to 0 at that point. Remove the unnecessary ternary operator. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/78a3702f2ea9f32a84faaae9b674c56269d330a7.1677658777.git.vmalik@redhat.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 05c4db355f281..905193d988852 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2615,7 +2615,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, strict = !OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); err = err ?: bpf_object__init_struct_ops_maps(obj); -- GitLab From 7832d06bd9f9080d9ceae388bb81bf33adce3850 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Wed, 1 Mar 2023 09:53:54 +0100 Subject: [PATCH 0100/2078] libbpf: Remove several dead assignments Clang Static Analyzer (scan-build) reports several dead assignments in libbpf where the assigned value is unconditionally overridden by another value before it is read. Remove these assignments. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/5503d18966583e55158471ebbb2f67374b11bf5e.1677658777.git.vmalik@redhat.com --- tools/lib/bpf/btf.c | 2 -- tools/lib/bpf/libbpf.c | 1 - tools/lib/bpf/relo_core.c | 3 --- 3 files changed, 6 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9181d36118d28..0a2c079244b6e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1000,8 +1000,6 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, } } - err = 0; - if (!btf_data) { pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); err = -ENODATA; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 905193d988852..ba9e7e2b7951d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -798,7 +798,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, progs = obj->programs; nr_progs = obj->nr_programs; nr_syms = symbols->d_size / sizeof(Elf64_Sym); - sec_off = 0; for (i = 0; i < nr_syms; i++) { sym = elf_sym_by_idx(obj, i); diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index c4b0e81ae2931..a26b2f5fa0fc9 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1551,9 +1551,6 @@ int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const st if (level <= 0) return -EINVAL; - local_t = btf_type_by_id(local_btf, local_id); - targ_t = btf_type_by_id(targ_btf, targ_id); - recur: depth--; if (depth < 0) -- GitLab From 4672129127eed0d16ab1b4b70b4e49014e49e8bb Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Wed, 1 Mar 2023 09:53:55 +0100 Subject: [PATCH 0101/2078] libbpf: Cleanup linker_append_elf_relos Clang Static Analyser (scan-build) reports some unused symbols and dead assignments in the linker_append_elf_relos function. Clean these up. Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/c5c8fe9f411b69afada8399d23bb048ef2a70535.1677658777.git.vmalik@redhat.com --- tools/lib/bpf/linker.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 4ac02c28e152a..d7069780984a3 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -1997,7 +1997,6 @@ add_sym: static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) { struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; - struct dst_sec *dst_symtab; int i, err; for (i = 1; i < obj->sec_cnt; i++) { @@ -2030,9 +2029,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob return -1; } - /* add_dst_sec() above could have invalidated linker->secs */ - dst_symtab = &linker->secs[linker->symtab_sec_idx]; - /* shdr->sh_link points to SYMTAB */ dst_sec->shdr->sh_link = linker->symtab_sec_idx; @@ -2049,16 +2045,13 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob dst_rel = dst_sec->raw_data + src_sec->dst_off; n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize; for (j = 0; j < n; j++, src_rel++, dst_rel++) { - size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info); - size_t sym_type = ELF64_R_TYPE(src_rel->r_info); - Elf64_Sym *src_sym, *dst_sym; - size_t dst_sym_idx; + size_t src_sym_idx, dst_sym_idx, sym_type; + Elf64_Sym *src_sym; src_sym_idx = ELF64_R_SYM(src_rel->r_info); src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx; dst_sym_idx = obj->sym_map[src_sym_idx]; - dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx; dst_rel->r_offset += src_linked_sec->dst_off; sym_type = ELF64_R_TYPE(src_rel->r_info); dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type); -- GitLab From 35cbf7f9156893f55b885f6fc678b2ab8b0d8918 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Mar 2023 19:54:17 +0200 Subject: [PATCH 0102/2078] selftests/bpf: Support custom per-test flags and multiple expected messages Extend __flag attribute by allowing to specify one of the following: * BPF_F_STRICT_ALIGNMENT * BPF_F_ANY_ALIGNMENT * BPF_F_TEST_RND_HI32 * BPF_F_TEST_STATE_FREQ * BPF_F_SLEEPABLE * BPF_F_XDP_HAS_FRAGS * Some numeric value Extend __msg attribute by allowing to specify multiple exepcted messages. All messages are expected to be present in the verifier log in the order of application. Signed-off-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230301175417.3146070-2-eddyz87@gmail.com [ Eduard: added commit message, formatting, comments ] --- tools/testing/selftests/bpf/progs/bpf_misc.h | 23 +++++++ tools/testing/selftests/bpf/test_loader.c | 69 +++++++++++++++++--- tools/testing/selftests/bpf/test_progs.h | 1 + 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 14e28f991451a..f704885aa5342 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -2,10 +2,33 @@ #ifndef __BPF_MISC_H__ #define __BPF_MISC_H__ +/* This set of attributes controls behavior of the + * test_loader.c:test_loader__run_subtests(). + * + * __msg Message expected to be found in the verifier log. + * Multiple __msg attributes could be specified. + * + * __success Expect program load success in privileged mode. + * + * __failure Expect program load failure in privileged mode. + * + * __log_level Log level to use for the program, numeric value expected. + * + * __flag Adds one flag use for the program, the following values are valid: + * - BPF_F_STRICT_ALIGNMENT; + * - BPF_F_TEST_RND_HI32; + * - BPF_F_TEST_STATE_FREQ; + * - BPF_F_SLEEPABLE; + * - BPF_F_XDP_HAS_FRAGS; + * - A numeric value. + * Multiple __flag attributes could be specified, the final flags + * value is derived by applying binary "or" to all specified values. + */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) +#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 679efb3aa785e..bf41390157bf3 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -13,12 +13,15 @@ #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" +#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" struct test_spec { const char *name; bool expect_failure; - const char *expect_msg; + const char **expect_msgs; + size_t expect_msg_cnt; int log_level; + int prog_flags; }; static int tester_init(struct test_loader *tester) @@ -67,7 +70,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const struct btf_type *t; - const char *s; + const char *s, *val; + char *e; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -82,14 +86,48 @@ static int parse_test_spec(struct test_loader *tester, } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { spec->expect_failure = false; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { - spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + void *tmp; + const char **msg; + + tmp = realloc(spec->expect_msgs, + (1 + spec->expect_msg_cnt) * sizeof(void *)); + if (!tmp) { + ASSERT_FAIL("failed to realloc memory for messages\n"); + return -ENOMEM; + } + spec->expect_msgs = tmp; + msg = &spec->expect_msgs[spec->expect_msg_cnt++]; + *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { + val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1; errno = 0; - spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0); - if (errno) { + spec->log_level = strtol(val, &e, 0); + if (errno || e[0] != '\0') { ASSERT_FAIL("failed to parse test log level from '%s'", s); return -EINVAL; } + } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { + val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { + spec->prog_flags |= BPF_F_TEST_RND_HI32; + } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { + spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { + spec->prog_flags |= BPF_F_SLEEPABLE; + } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { + spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + } else /* assume numeric value */ { + errno = 0; + spec->prog_flags |= strtol(val, &e, 0); + if (errno || e[0] != '\0') { + ASSERT_FAIL("failed to parse test prog flags from '%s'", s); + return -EINVAL; + } + } } } @@ -101,7 +139,7 @@ static void prepare_case(struct test_loader *tester, struct bpf_object *obj, struct bpf_program *prog) { - int min_log_level = 0; + int min_log_level = 0, prog_flags; if (env.verbosity > VERBOSE_NONE) min_log_level = 1; @@ -119,7 +157,11 @@ static void prepare_case(struct test_loader *tester, else bpf_program__set_log_level(prog, spec->log_level); + prog_flags = bpf_program__flags(prog); + bpf_program__set_flags(prog, prog_flags | spec->prog_flags); + tester->log_buf[0] = '\0'; + tester->next_match_pos = 0; } static void emit_verifier_log(const char *log_buf, bool force) @@ -135,17 +177,26 @@ static void validate_case(struct test_loader *tester, struct bpf_program *prog, int load_err) { - if (spec->expect_msg) { + int i, j; + + for (i = 0; i < spec->expect_msg_cnt; i++) { char *match; + const char *expect_msg; + + expect_msg = spec->expect_msgs[i]; - match = strstr(tester->log_buf, spec->expect_msg); + match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { /* if we are in verbose mode, we've already emitted log */ if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); - fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg); + for (j = 0; j < i; j++) + fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]); + fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); return; } + + tester->next_match_pos = match - tester->log_buf + strlen(expect_msg); } } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 9fbdc57c5b570..3cbf005747ed7 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -427,6 +427,7 @@ int get_bpf_max_tramp_links(void); struct test_loader { char *log_buf; size_t log_buf_sz; + size_t next_match_pos; struct bpf_object *obj; }; -- GitLab From 7ce60b110eece1d7b3d5c322fd11f6d41a29d17b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Mar 2023 13:49:09 -0600 Subject: [PATCH 0103/2078] bpf: Fix doxygen comments for dynptr slice kfuncs In commit 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr"), the bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() kfuncs were added to BPF. These kfuncs included doxygen headers, but unfortunately those headers are not properly formatted according to [0], and causes the following warnings during the docs build: ./kernel/bpf/helpers.c:2225: warning: \ Excess function parameter 'returns' description in 'bpf_dynptr_slice' ./kernel/bpf/helpers.c:2303: warning: \ Excess function parameter 'returns' description in 'bpf_dynptr_slice_rdwr' ... This patch fixes those doxygen comments. [0]: https://docs.kernel.org/doc-guide/kernel-doc.html#function-documentation Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr") Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230301194910.602738-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 648b29e78b844..58431a92bb65e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2194,7 +2194,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) } /** - * bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data. + * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. @@ -2209,13 +2214,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in * the bpf program. * - * @ptr: The dynptr whose data slice to retrieve - * @offset: Offset into the dynptr - * @buffer: User-provided buffer to copy contents into - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the - * requested slice. This must be a constant. - * - * @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only + * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) @@ -2258,7 +2257,12 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset } /** - * bpf_dynptr_slice_rdwr - Obtain a writable pointer to the dynptr data. + * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. + * @ptr: The dynptr whose data slice to retrieve + * @offset: Offset into the dynptr + * @buffer: User-provided buffer to copy contents into + * @buffer__szk: Size (in bytes) of the buffer. This is the length of the + * requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. @@ -2287,13 +2291,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in * the bpf program. * - * @ptr: The dynptr whose data slice to retrieve - * @offset: Offset into the dynptr - * @buffer: User-provided buffer to copy contents into - * @buffer__szk: Size (in bytes) of the buffer. This is the length of the - * requested slice. This must be a constant. - * - * @returns: NULL if the call failed (eg invalid dynptr), pointer to a + * Return: NULL if the call failed (eg invalid dynptr), pointer to a * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) -- GitLab From db52b587c67f40e4bd6e8167f2334d4500617bdc Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 1 Mar 2023 13:49:10 -0600 Subject: [PATCH 0104/2078] bpf, docs: Fix __uninit kfunc doc section In commit d96d937d7c5c ("bpf: Add __uninit kfunc annotation"), the __uninit kfunc annotation was documented in kfuncs.rst. You have to fully underline a section in rst, or the build will issue a warning that the title underline is too short: ./Documentation/bpf/kfuncs.rst:104: WARNING: Title underline too short. 2.2.2 __uninit Annotation -------------------- This patch fixes that title underline. Fixes: d96d937d7c5c ("bpf: Add __uninit kfunc annotation") Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230301194910.602738-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 9a78533d25ac7..9d85bbc3b7713 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -101,7 +101,7 @@ size parameter, and the value of the constant matters for program safety, __k suffix should be used. 2.2.2 __uninit Annotation --------------------- +------------------------- This annotation is used to indicate that the argument will be treated as uninitialized. -- GitLab From 1eebcb60633fd469ee27b0fbd7ee4f271feedeca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 1 Mar 2023 21:23:06 +0000 Subject: [PATCH 0105/2078] libbpf: Implement basic zip archive parsing support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change implements support for reading zip archives, including opening an archive, finding an entry based on its path and name in it, and closing it. The code was copied from https://github.com/iovisor/bcc/pull/4440, which implements similar functionality for bcc. The author confirmed that he is fine with this usage and the corresponding relicensing. I adjusted it to adhere to libbpf coding standards. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Michał Gregorczyk Link: https://lore.kernel.org/bpf/20230301212308.1839139-2-deso@posteo.net --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/zip.c | 328 ++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/zip.h | 47 +++++++ 3 files changed, 376 insertions(+), 1 deletion(-) create mode 100644 tools/lib/bpf/zip.c create mode 100644 tools/lib/bpf/zip.h diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 5a3dfb56d78fd..b8b0a6369363e 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o + usdt.o zip.o diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c new file mode 100644 index 0000000000000..8458c2dd0e3bc --- /dev/null +++ b/tools/lib/bpf/zip.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* + * Routines for dealing with .zip archives. + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libbpf_internal.h" +#include "zip.h" + +/* Specification of ZIP file format can be found here: + * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + * For a high level overview of the structure of a ZIP file see + * sections 4.3.1 - 4.3.6. + * + * Data structures appearing in ZIP files do not contain any + * padding and they might be misaligned. To allow us to safely + * operate on pointers to such structures and their members, we + * declare the types as packed. + */ + +#define END_OF_CD_RECORD_MAGIC 0x06054b50 + +/* See section 4.3.16 of the spec. */ +struct end_of_cd_record { + /* Magic value equal to END_OF_CD_RECORD_MAGIC */ + __u32 magic; + + /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. + * Zip archive might span multiple files (disks). + */ + __u16 this_disk; + + /* Number of the file containing the beginning of the central directory or + * 0xFFFF if ZIP64 archive. + */ + __u16 cd_disk; + + /* Number of central directory records on this disk or 0xFFFF if ZIP64 + * archive. + */ + __u16 cd_records; + + /* Number of central directory records on all disks or 0xFFFF if ZIP64 + * archive. + */ + __u16 cd_records_total; + + /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ + __u32 cd_size; + + /* Offset of the central directory from the beginning of the archive or + * 0xFFFFFFFF if ZIP64 archive. + */ + __u32 cd_offset; + + /* Length of comment data following end of central directory record. */ + __u16 comment_length; + + /* Up to 64k of arbitrary bytes. */ + /* uint8_t comment[comment_length] */ +} __attribute__((packed)); + +#define CD_FILE_HEADER_MAGIC 0x02014b50 +#define FLAG_ENCRYPTED (1 << 0) +#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) + +/* See section 4.3.12 of the spec. */ +struct cd_file_header { + /* Magic value equal to CD_FILE_HEADER_MAGIC. */ + __u32 magic; + __u16 version; + /* Minimum zip version needed to extract the file. */ + __u16 min_version; + __u16 flags; + __u16 compression; + __u16 last_modified_time; + __u16 last_modified_date; + __u32 crc; + __u32 compressed_size; + __u32 uncompressed_size; + __u16 file_name_length; + __u16 extra_field_length; + __u16 file_comment_length; + /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ + __u16 disk; + __u16 internal_attributes; + __u32 external_attributes; + /* Offset from the start of the disk containing the local file header to the + * start of the local file header. + */ + __u32 offset; +} __attribute__((packed)); + +#define LOCAL_FILE_HEADER_MAGIC 0x04034b50 + +/* See section 4.3.7 of the spec. */ +struct local_file_header { + /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ + __u32 magic; + /* Minimum zip version needed to extract the file. */ + __u16 min_version; + __u16 flags; + __u16 compression; + __u16 last_modified_time; + __u16 last_modified_date; + __u32 crc; + __u32 compressed_size; + __u32 uncompressed_size; + __u16 file_name_length; + __u16 extra_field_length; +} __attribute__((packed)); + +struct zip_archive { + void *data; + __u32 size; + __u32 cd_offset; + __u32 cd_records; +}; + +static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) +{ + if (offset + size > archive->size || offset > offset + size) + return NULL; + + return archive->data + offset; +} + +/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the + * archive uses features which are not supported. + */ +static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) +{ + __u16 comment_length, cd_records; + struct end_of_cd_record *eocd; + __u32 cd_offset, cd_size; + + eocd = check_access(archive, offset, sizeof(*eocd)); + if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) + return -EINVAL; + + comment_length = eocd->comment_length; + if (offset + sizeof(*eocd) + comment_length != archive->size) + return -EINVAL; + + cd_records = eocd->cd_records; + if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) + /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ + return -ENOTSUP; + + cd_offset = eocd->cd_offset; + cd_size = eocd->cd_size; + if (!check_access(archive, cd_offset, cd_size)) + return -EINVAL; + + archive->cd_offset = cd_offset; + archive->cd_records = cd_records; + return 0; +} + +static int find_cd(struct zip_archive *archive) +{ + int rc = -EINVAL; + int64_t limit; + __u32 offset; + + if (archive->size <= sizeof(struct end_of_cd_record)) + return -EINVAL; + + /* Because the end of central directory ends with a variable length array of + * up to 0xFFFF bytes we can't know exactly where it starts and need to + * search for it at the end of the file, scanning the (limit, offset] range. + */ + offset = archive->size - sizeof(struct end_of_cd_record); + limit = (int64_t)offset - (1 << 16); + + for (; offset >= 0 && offset > limit && rc != 0; offset--) { + rc = try_parse_end_of_cd(archive, offset); + if (rc == -ENOTSUP) + break; + } + return rc; +} + +struct zip_archive *zip_archive_open(const char *path) +{ + struct zip_archive *archive; + int err, fd; + off_t size; + void *data; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return ERR_PTR(-errno); + + size = lseek(fd, 0, SEEK_END); + if (size == (off_t)-1 || size > UINT32_MAX) { + close(fd); + return ERR_PTR(-EINVAL); + } + + data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return ERR_PTR(err); + + archive = malloc(sizeof(*archive)); + if (!archive) { + munmap(data, size); + return ERR_PTR(-ENOMEM); + }; + + archive->data = data; + archive->size = size; + + err = find_cd(archive); + if (err) { + munmap(data, size); + free(archive); + return ERR_PTR(err); + } + + return archive; +} + +void zip_archive_close(struct zip_archive *archive) +{ + munmap(archive->data, archive->size); + free(archive); +} + +static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive, + __u32 offset) +{ + struct local_file_header *lfh; + + lfh = check_access(archive, offset, sizeof(*lfh)); + if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) + return NULL; + + return lfh; +} + +static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) +{ + struct local_file_header *lfh; + __u32 compressed_size; + const char *name; + void *data; + + lfh = local_file_header_at_offset(archive, offset); + if (!lfh) + return -EINVAL; + + offset += sizeof(*lfh); + if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) + return -EINVAL; + + name = check_access(archive, offset, lfh->file_name_length); + if (!name) + return -EINVAL; + + offset += lfh->file_name_length; + if (!check_access(archive, offset, lfh->extra_field_length)) + return -EINVAL; + + offset += lfh->extra_field_length; + compressed_size = lfh->compressed_size; + data = check_access(archive, offset, compressed_size); + if (!data) + return -EINVAL; + + out->compression = lfh->compression; + out->name_length = lfh->file_name_length; + out->name = name; + out->data = data; + out->data_length = compressed_size; + out->data_offset = offset; + + return 0; +} + +int zip_archive_find_entry(struct zip_archive *archive, const char *file_name, + struct zip_entry *out) +{ + size_t file_name_length = strlen(file_name); + __u32 i, offset = archive->cd_offset; + + for (i = 0; i < archive->cd_records; ++i) { + __u16 cdfh_name_length, cdfh_flags; + struct cd_file_header *cdfh; + const char *cdfh_name; + + cdfh = check_access(archive, offset, sizeof(*cdfh)); + if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) + return -EINVAL; + + offset += sizeof(*cdfh); + cdfh_name_length = cdfh->file_name_length; + cdfh_name = check_access(archive, offset, cdfh_name_length); + if (!cdfh_name) + return -EINVAL; + + cdfh_flags = cdfh->flags; + if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && + (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && + file_name_length == cdfh_name_length && + memcmp(file_name, archive->data + offset, file_name_length) == 0) { + return get_entry_at_offset(archive, cdfh->offset, out); + } + + offset += cdfh_name_length; + offset += cdfh->extra_field_length; + offset += cdfh->file_comment_length; + } + + return -ENOENT; +} diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h new file mode 100644 index 0000000000000..1c1bb21fba764 --- /dev/null +++ b/tools/lib/bpf/zip.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LIBBPF_ZIP_H +#define __LIBBPF_ZIP_H + +#include + +/* Represents an open zip archive. + * Only basic ZIP files are supported, in particular the following are not + * supported: + * - encryption + * - streaming + * - multi-part ZIP files + * - ZIP64 + */ +struct zip_archive; + +/* Carries information on name, compression method, and data corresponding to a + * file in a zip archive. + */ +struct zip_entry { + /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */ + __u16 compression; + + /* Non-null terminated name of the file. */ + const char *name; + /* Length of the file name. */ + __u16 name_length; + + /* Pointer to the file data. */ + const void *data; + /* Length of the file data. */ + __u32 data_length; + /* Offset of the file data within the archive. */ + __u32 data_offset; +}; + +/* Open a zip archive. Returns NULL in case of an error. */ +struct zip_archive *zip_archive_open(const char *path); + +/* Close a zip archive and release resources. */ +void zip_archive_close(struct zip_archive *archive); + +/* Look up an entry corresponding to a file in given zip archive. */ +int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out); + +#endif -- GitLab From 434fdcead73515a76885418ffe2d96b4f3ed0f49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 1 Mar 2023 21:23:07 +0000 Subject: [PATCH 0106/2078] libbpf: Introduce elf_find_func_offset_from_file() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change splits the elf_find_func_offset() function in two: elf_find_func_offset(), which now accepts an already opened Elf object instead of a path to a file that is to be opened, as well as elf_find_func_offset_from_file(), which opens a binary based on a path and then invokes elf_find_func_offset() on the Elf object. Having this split in responsibilities will allow us to call elf_find_func_offset() from other code paths on Elf objects that did not necessarily come from a file on disk. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230301212308.1839139-3-deso@posteo.net --- tools/lib/bpf/libbpf.c | 57 ++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ba9e7e2b7951d..1abef3dc9a6ed 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10530,32 +10530,19 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) return NULL; } -/* Find offset of function name in object specified by path. "name" matches - * symbol name or name@@LIB for library functions. +/* Find offset of function name in the provided ELF object. "binary_path" is + * the path to the ELF binary represented by "elf", and only used for error + * reporting matters. "name" matches symbol name or name@@LIB for library + * functions. */ -static long elf_find_func_offset(const char *binary_path, const char *name) +static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { - int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; bool is_shared_lib, is_name_qualified; - char errmsg[STRERR_BUFSIZE]; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; - Elf *elf; - fd = open(binary_path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - ret = -errno; - pr_warn("failed to open %s: %s\n", binary_path, - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); - return ret; - } - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); - if (!elf) { - pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); - close(fd); - return -LIBBPF_ERRNO__FORMAT; - } if (!gelf_getehdr(elf, &ehdr)) { pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); ret = -LIBBPF_ERRNO__FORMAT; @@ -10568,7 +10555,7 @@ static long elf_find_func_offset(const char *binary_path, const char *name) /* Does name specify "@@LIB"? */ is_name_qualified = strstr(name, "@@") != NULL; - /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically * linked binary may not have SHT_DYMSYM, so absence of a section should not be * reported as a warning/error. @@ -10681,6 +10668,34 @@ static long elf_find_func_offset(const char *binary_path, const char *name) } } out: + return ret; +} + +/* Find offset of function name in ELF object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset_from_file(const char *binary_path, const char *name) +{ + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + Elf *elf; + int fd; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + + ret = elf_find_func_offset(elf, binary_path, name); elf_end(elf); close(fd); return ret; @@ -10804,7 +10819,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, if (func_name) { long sym_off; - sym_off = elf_find_func_offset(binary_path, func_name); + sym_off = elf_find_func_offset_from_file(binary_path, func_name); if (sym_off < 0) return libbpf_err_ptr(sym_off); func_offset += sym_off; -- GitLab From c44fd84507637f07ca9a7e16f83706e109f6b8ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 1 Mar 2023 21:23:08 +0000 Subject: [PATCH 0107/2078] libbpf: Add support for attaching uprobes to shared objects in APKs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds support for attaching uprobes to shared objects located in APKs, which is relevant for Android systems where various libraries may reside in APKs. To make that happen, we extend the syntax for the "binary path" argument to attach to with that supported by various Android tools: !/ For example: /system/app/test-app/test-app.apk!/lib/arm64-v8a/libc++_shared.so APKs need to be specified via full path, i.e., we do not attempt to resolve mere file names by searching system directories. We cannot currently test this functionality end-to-end in an automated fashion, because it relies on an Android system being present, but there is no support for that in CI. I have tested the functionality manually, by creating a libbpf program containing a uretprobe, attaching it to a function inside a shared object inside an APK, and verifying the sanity of the returned values. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230301212308.1839139-4-deso@posteo.net --- tools/lib/bpf/libbpf.c | 91 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 7 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1abef3dc9a6ed..dacaae31b76a5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -53,6 +53,7 @@ #include "libbpf_internal.h" #include "hashmap.h" #include "bpf_gen_internal.h" +#include "zip.h" #ifndef BPF_FS_MAGIC #define BPF_FS_MAGIC 0xcafe4a11 @@ -10701,6 +10702,68 @@ static long elf_find_func_offset_from_file(const char *binary_path, const char * return ret; } +/* Find offset of function name in archive specified by path. Currently + * supported are .zip files that do not compress their contents, as used on + * Android in the form of APKs, for example. "file_name" is the name of the ELF + * file inside the archive. "func_name" matches symbol name or name@@LIB for + * library functions. + * + * An overview of the APK format specifically provided here: + * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents + */ +static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, + const char *func_name) +{ + struct zip_archive *archive; + struct zip_entry entry; + long ret; + Elf *elf; + + archive = zip_archive_open(archive_path); + if (IS_ERR(archive)) { + ret = PTR_ERR(archive); + pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); + return ret; + } + + ret = zip_archive_find_entry(archive, file_name, &entry); + if (ret) { + pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, + archive_path, ret); + goto out; + } + pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, + (unsigned long)entry.data_offset); + + if (entry.compression) { + pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, + archive_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + + elf = elf_memory((void *)entry.data, entry.data_length); + if (!elf) { + pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, + elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__LIBELF; + goto out; + } + + ret = elf_find_func_offset(elf, file_name, func_name); + if (ret > 0) { + pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", + func_name, file_name, archive_path, entry.data_offset, ret, + ret + entry.data_offset); + ret += entry.data_offset; + } + elf_end(elf); + +out: + zip_archive_close(archive); + return ret; +} + static const char *arch_specific_lib_paths(void) { /* @@ -10786,9 +10849,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { - DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); + const char *archive_path = NULL, *archive_sep = NULL; char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; - char full_binary_path[PATH_MAX]; + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); + char full_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; @@ -10805,21 +10869,34 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, if (!binary_path) return libbpf_err_ptr(-EINVAL); - if (!strchr(binary_path, '/')) { - err = resolve_full_path(binary_path, full_binary_path, - sizeof(full_binary_path)); + /* Check if "binary_path" refers to an archive. */ + archive_sep = strstr(binary_path, "!/"); + if (archive_sep) { + full_path[0] = '\0'; + libbpf_strlcpy(full_path, binary_path, + min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); + archive_path = full_path; + binary_path = archive_sep + 2; + } else if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_path, sizeof(full_path)); if (err) { pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", prog->name, binary_path, err); return libbpf_err_ptr(err); } - binary_path = full_binary_path; + binary_path = full_path; } func_name = OPTS_GET(opts, func_name, NULL); if (func_name) { long sym_off; - sym_off = elf_find_func_offset_from_file(binary_path, func_name); + if (archive_path) { + sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, + func_name); + binary_path = archive_path; + } else { + sym_off = elf_find_func_offset_from_file(binary_path, func_name); + } if (sym_off < 0) return libbpf_err_ptr(sym_off); func_offset += sym_off; -- GitLab From c45eac537bd8b4977d335c123212140bc5257670 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 21:30:14 -0800 Subject: [PATCH 0108/2078] bpf: Fix bpf_dynptr_slice{_rdwr} to return NULL instead of 0 Change bpf_dynptr_slice and bpf_dynptr_slice_rdwr to return NULL instead of 0, in accordance with the codebase guidelines. Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr") Reported-by: kernel test robot Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230302053014.1726219-1-joannelkoong@gmail.com --- kernel/bpf/helpers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 58431a92bb65e..de9ef8476e298 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2227,11 +2227,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset int err; if (!ptr->data) - return 0; + return NULL; err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) - return 0; + return NULL; type = bpf_dynptr_get_type(ptr); @@ -2252,7 +2252,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset } default: WARN_ONCE(true, "unknown dynptr type %d\n", type); - return 0; + return NULL; } } @@ -2300,7 +2300,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o void *buffer, u32 buffer__szk) { if (!ptr->data || bpf_dynptr_is_rdonly(ptr)) - return 0; + return NULL; /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * -- GitLab From d56b0c461d19dae917fa0bba76cbe8ad7a44712e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 2 Mar 2023 12:39:17 -0600 Subject: [PATCH 0109/2078] bpf, docs: Fix link to netdev-FAQ target The BPF devel Q&A documentation page makes frequent reference to the netdev-QA page via the netdev-FAQ rst link. This link is currently broken, as is evidenced by the build output when making BPF docs: ./Documentation/bpf/bpf_devel_QA.rst:150: WARNING: undefined label: 'netdev-faq' ./Documentation/bpf/bpf_devel_QA.rst:206: WARNING: undefined label: 'netdev-faq' ./Documentation/bpf/bpf_devel_QA.rst:231: WARNING: undefined label: 'netdev-faq' ./Documentation/bpf/bpf_devel_QA.rst:396: WARNING: undefined label: 'netdev-faq' ./Documentation/bpf/bpf_devel_QA.rst:412: WARNING: undefined label: 'netdev-faq' Fix the links to point to the actual netdev-faq page. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230302183918.54190-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/bpf_devel_QA.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 03d4993eda6f0..5f5f9ccc3862b 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -128,7 +128,7 @@ into the bpf-next tree will make their way into net-next tree. net and net-next are both run by David S. Miller. From there, they will go into the kernel mainline tree run by Linus Torvalds. To read up on the process of net and net-next being merged into the mainline tree, see -the :ref:`netdev-FAQ` +the `netdev-FAQ`_. @@ -147,7 +147,7 @@ request):: Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to? --------------------------------------------------------------------------------- -A: The process is the very same as described in the :ref:`netdev-FAQ`, +A: The process is the very same as described in the `netdev-FAQ`_, so please read up on it. The subject line must indicate whether the patch is a fix or rather "next-like" content in order to let the maintainers know whether it is targeted at bpf or bpf-next. @@ -206,7 +206,7 @@ ii) run extensive BPF test suite and Once the BPF pull request was accepted by David S. Miller, then the patches end up in net or net-next tree, respectively, and make their way from there further into mainline. Again, see the -:ref:`netdev-FAQ` for additional information e.g. on how often they are +`netdev-FAQ`_ for additional information e.g. on how often they are merged to mainline. Q: How long do I need to wait for feedback on my BPF patches? @@ -230,7 +230,7 @@ Q: Are patches applied to bpf-next when the merge window is open? ----------------------------------------------------------------- A: For the time when the merge window is open, bpf-next will not be processed. This is roughly analogous to net-next patch processing, -so feel free to read up on the :ref:`netdev-FAQ` about further details. +so feel free to read up on the `netdev-FAQ`_ about further details. During those two weeks of merge window, we might ask you to resend your patch series once bpf-next is open again. Once Linus released @@ -394,7 +394,7 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up: netdev@vger.kernel.org The process in general is the same as on netdev itself, see also the -:ref:`netdev-FAQ`. +`netdev-FAQ`_. Q: Do you also backport to kernels not currently maintained as stable? ---------------------------------------------------------------------- @@ -410,7 +410,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well What should I do? A: The same rules apply as with netdev patch submissions in general, see -the :ref:`netdev-FAQ`. +the `netdev-FAQ`_. Never add "``Cc: stable@vger.kernel.org``" to the patch description, but ask the BPF maintainers to queue the patches instead. This can be done @@ -685,7 +685,7 @@ when: .. Links .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/ -.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst +.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html .. _selftests: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ .. _Documentation/dev-tools/kselftest.rst: -- GitLab From cacad346f67ce9604dcc9db10f1f1769dabb3891 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 2 Mar 2023 12:39:18 -0600 Subject: [PATCH 0110/2078] bpf, docs: Fix final bpf docs build failure maps.rst in the BPF documentation links to the /userspace-api/ebpf/syscall document (Documentation/userspace-api/ebpf/syscall.rst). For some reason, if you try to reference the document with :doc:, the docs build emits the following warning: ./Documentation/bpf/maps.rst:13: WARNING: \ unknown document: '/userspace-api/ebpf/syscall' It appears that other places in the docs tree also don't support using :doc:. Elsewhere in the BPF documentation, we just reference the kernel docs page directly. Let's do that here to clean up the last remaining noise in the docs build. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230302183918.54190-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/maps.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst index 4906ff0f83820..6f069f3d6f4b0 100644 --- a/Documentation/bpf/maps.rst +++ b/Documentation/bpf/maps.rst @@ -11,9 +11,9 @@ maps are accessed from BPF programs via BPF helpers which are documented in the `man-pages`_ for `bpf-helpers(7)`_. BPF maps are accessed from user space via the ``bpf`` syscall, which provides -commands to create maps, lookup elements, update elements and delete -elements. More details of the BPF syscall are available in -:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_. +commands to create maps, lookup elements, update elements and delete elements. +More details of the BPF syscall are available in `ebpf-syscall`_ and in the +`man-pages`_ for `bpf(2)`_. Map Types ========= @@ -79,3 +79,4 @@ Find and delete element by key in a given map using ``attr->map_fd``, .. _man-pages: https://www.kernel.org/doc/man-pages/ .. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html .. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html +.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html -- GitLab From c501bf55c88b834adefda870c7c092ec9052a437 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Mar 2023 09:42:59 -1000 Subject: [PATCH 0111/2078] bpf: Make bpf_get_current_[ancestor_]cgroup_id() available for all program types These helpers are safe to call from any context and there's no reason to restrict access to them. Remove them from bpf_trace and filter lists and add to bpf_base_func_proto() under perfmon_capable(). v2: After consulting with Andrii, relocated in bpf_base_func_proto() so that they require bpf_capable() but not perfomon_capable() as it doesn't read from or affect others on the system. Signed-off-by: Tejun Heo Cc: Andrii Nakryiko Link: https://lore.kernel.org/r/ZAD8QyoszMZiTzBY@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 4 ---- kernel/bpf/helpers.c | 4 ++++ kernel/trace/bpf_trace.c | 4 ---- net/core/filter.c | 6 ------ 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index bf2fdb33fb313..a4ae422b8f122 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2529,10 +2529,6 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index de9ef8476e298..6fc0d6c44e4c9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1748,6 +1748,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; + case BPF_FUNC_get_current_cgroup_id: + return &bpf_get_current_cgroup_id_proto; + case BPF_FUNC_get_current_ancestor_cgroup_id: + return &bpf_get_current_ancestor_cgroup_id_proto; #endif default: break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e8da032bb6fc8..bcf91bc7bf71f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1453,10 +1453,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: diff --git a/net/core/filter.c b/net/core/filter.c index 8f3124e061330..a2dc44e70ea0a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8165,12 +8165,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_delete_proto; case BPF_FUNC_get_netns_cookie: return &bpf_get_netns_cookie_sk_msg_proto; -#ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; -#endif #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; -- GitLab From ec97a76f113ee0d8fd17178b4e4ffbf0ab9e5452 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 2 Mar 2023 16:55:00 -0800 Subject: [PATCH 0112/2078] selftests/bpf: Add -Wuninitialized flag to bpf prog flags Per C99 standard [0], Section 6.7.8, Paragraph 10: If an object that has automatic storage duration is not initialized explicitly, its value is indeterminate. And in the same document, in appendix "J.2 Undefined behavior": The behavior is undefined in the following circumstances: [...] The value of an object with automatic storage duration is used while it is indeterminate (6.2.4, 6.7.8, 6.8). This means that use of an uninitialized stack variable is undefined behavior, and therefore that clang can choose to do a variety of scary things, such as not generating bytecode for "bunch of useful code" in the below example: void some_func() { int i; if (!i) return; // bunch of useful code } To add insult to injury, if some_func above is a helper function for some BPF program, clang can choose to not generate an "exit" insn, causing verifier to fail with "last insn is not an exit or jmp". Going from that verification failure to the root cause of uninitialized use is certain to be frustrating. This patch adds -Wuninitialized to the cflags for selftest BPF progs and fixes up existing instances of uninitialized use. [0]: https://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf Signed-off-by: Dave Marchevsky Cc: David Vernet Cc: Tejun Heo Acked-by: David Vernet Link: https://lore.kernel.org/r/20230303005500.1614874-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/progs/rbtree.c | 2 +- tools/testing/selftests/bpf/progs/rbtree_fail.c | 7 +++++-- .../selftests/bpf/progs/test_kfunc_dynptr_param.c | 2 +- .../testing/selftests/bpf/progs/test_sk_lookup_kern.c | 2 +- tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 10 +++++----- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f40606a85a0f0..eab3cf5399f52 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -357,7 +357,7 @@ BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types -Wuninitialized $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index e5db1a4287e50..4c90aa6abddd9 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -75,7 +75,7 @@ SEC("tc") long rbtree_add_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; - struct node_data *n, *m; + struct node_data *n, *m = NULL; n = bpf_obj_new(typeof(*n)); if (!n) diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index bf3cba115897e..1ced900f3fced 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -232,8 +232,11 @@ long rbtree_api_first_release_unlock_escape(void *ctx) bpf_spin_lock(&glock); res = bpf_rbtree_first(&groot); - if (res) - n = container_of(res, struct node_data, node); + if (!res) { + bpf_spin_unlock(&glock); + return 1; + } + n = container_of(res, struct node_data, node); bpf_spin_unlock(&glock); bpf_spin_lock(&glock); diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 2fbef3cc7ad84..2dde8e3fe4c9a 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -48,7 +48,7 @@ SEC("?lsm.s/bpf") __failure __msg("arg#0 expected pointer to stack or dynptr_ptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { - unsigned long val; + unsigned long val = 0; return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val, (struct bpf_dynptr *)val, NULL); diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index b502e5c92e338..6ccf6d546074e 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -23,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, bool *ipv4) { struct bpf_sock_tuple *result; + __u64 ihl_len = 0; __u8 proto = 0; - __u64 ihl_len; if (eth_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)(data + nh_off); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 508da4a23c4f0..95b4aa0928baa 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -324,11 +324,11 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_dst(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -363,11 +363,11 @@ int vxlan_set_tunnel_dst(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_src(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -494,9 +494,9 @@ SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -525,9 +525,9 @@ SEC("tc") int ip6vxlan_set_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -556,9 +556,9 @@ SEC("tc") int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { -- GitLab From f71f8530494bb5ab43d3369ef0ce8373eb1ee077 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 2 Mar 2023 13:46:13 +0200 Subject: [PATCH 0113/2078] bpf: Add support for absolute value BPF timers Add a new flag BPF_F_TIMER_ABS that can be passed to bpf_timer_start() to start an absolute value timer instead of the default relative value. This makes the timer expire at an exact point in time, instead of a time with latencies induced by both the BPF and timer subsystems. Suggested-by: Artem Bityutskiy Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20230302114614.2985072-2-tero.kristo@linux.intel.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 15 +++++++++++++++ kernel/bpf/helpers.c | 11 +++++++++-- tools/include/uapi/linux/bpf.h | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c9699304aed2d..976b194eb7754 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4969,6 +4969,12 @@ union bpf_attr { * different maps if key/value layout matches across maps. * Every bpf_timer_set_callback() can have different callback_fn. * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * * Return * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier @@ -7097,4 +7103,13 @@ struct bpf_core_relo { enum bpf_core_relo_kind kind; }; +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6fc0d6c44e4c9..12f12e879bcf0 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1264,10 +1264,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla { struct bpf_hrtimer *t; int ret = 0; + enum hrtimer_mode mode; if (in_nmi()) return -EOPNOTSUPP; - if (flags) + if (flags > BPF_F_TIMER_ABS) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; @@ -1275,7 +1276,13 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla ret = -EINVAL; goto out; } - hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); + + if (flags & BPF_F_TIMER_ABS) + mode = HRTIMER_MODE_ABS_SOFT; + else + mode = HRTIMER_MODE_REL_SOFT; + + hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c9699304aed2d..976b194eb7754 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4969,6 +4969,12 @@ union bpf_attr { * different maps if key/value layout matches across maps. * Every bpf_timer_set_callback() can have different callback_fn. * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * * Return * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier @@ -7097,4 +7103,13 @@ struct bpf_core_relo { enum bpf_core_relo_kind kind; }; +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- GitLab From 944459e88b4f5c71683b56710f96e39756afae31 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 2 Mar 2023 13:46:14 +0200 Subject: [PATCH 0114/2078] selftests/bpf: Add absolute timer test Add test for the absolute BPF timer under the existing timer tests. This will run the timer two times with 1us expiration time, and then re-arm the timer at ~35s in the future. At the end, it is verified that the absolute timer expired exactly two times. Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20230302114614.2985072-3-tero.kristo@linux.intel.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/timer.c | 3 ++ tools/testing/selftests/bpf/progs/timer.c | 45 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 7eb0492148599..290c21dbe65ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -29,6 +29,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb2() was executed twice */ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data"); + /* check that timer_cb3() was executed twice */ + ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index acda5c9cea933..9a16d95213e11 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -46,7 +46,15 @@ struct { __type(value, struct elem); } lru SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} abs_timer SEC(".maps"); + __u64 bss_data; +__u64 abs_data; __u64 err; __u64 ok; __u64 callback_check = 52; @@ -284,3 +292,40 @@ int BPF_PROG2(test2, int, a, int, b) return bpf_timer_test(); } + +/* callback for absolute timer */ +static int timer_cb3(void *map, int *key, struct bpf_timer *timer) +{ + abs_data += 6; + + if (abs_data < 12) { + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } else { + /* Re-arm timer ~35 seconds in future */ + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35), + BPF_F_TIMER_ABS); + } + + return 0; +} + +SEC("fentry/bpf_fentry_test3") +int BPF_PROG2(test3, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + bpf_printk("test3"); + + timer = bpf_map_lookup_elem(&abs_timer, &key); + if (timer) { + if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0) + err |= 2048; + bpf_timer_set_callback(timer, timer_cb3); + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } + + return 0; +} -- GitLab From 03b77e17aeb22a5935ea20d585ca6a1f2947e62b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:41 -0800 Subject: [PATCH 0115/2078] bpf: Rename __kptr_ref -> __kptr and __kptr -> __kptr_untrusted. __kptr meant to store PTR_UNTRUSTED kernel pointers inside bpf maps. The concept felt useful, but didn't get much traction, since bpf_rdonly_cast() was added soon after and bpf programs received a simpler way to access PTR_UNTRUSTED kernel pointers without going through restrictive __kptr usage. Rename __kptr_ref -> __kptr and __kptr -> __kptr_untrusted to indicate its intended usage. The main goal of __kptr_untrusted was to read/write such pointers directly while bpf_kptr_xchg was a mechanism to access refcnted kernel pointers. The next patch will allow RCU protected __kptr access with direct read. At that point __kptr_untrusted will be deprecated. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-2-alexei.starovoitov@gmail.com --- Documentation/bpf/bpf_design_QA.rst | 4 ++-- Documentation/bpf/cpumasks.rst | 4 ++-- Documentation/bpf/kfuncs.rst | 2 +- kernel/bpf/btf.c | 4 ++-- tools/lib/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/progs/cb_refs.c | 2 +- .../selftests/bpf/progs/cgrp_kfunc_common.h | 2 +- .../selftests/bpf/progs/cpumask_common.h | 2 +- .../selftests/bpf/progs/jit_probe_mem.c | 2 +- tools/testing/selftests/bpf/progs/lru_bug.c | 2 +- tools/testing/selftests/bpf/progs/map_kptr.c | 4 ++-- .../selftests/bpf/progs/map_kptr_fail.c | 6 ++--- .../selftests/bpf/progs/task_kfunc_common.h | 2 +- tools/testing/selftests/bpf/test_verifier.c | 22 +++++++++---------- 14 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index bfff0e7e37c2f..38372a956d651 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -314,7 +314,7 @@ Q: What is the compatibility story for special BPF types in map values? Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map values (when using BTF support for BPF maps). This allows to use helpers for such objects on these fields inside map values. Users are also allowed to embed -pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the +pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the kernel preserve backwards compatibility for these features? A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else: @@ -324,7 +324,7 @@ For struct types that have been added already, like bpf_spin_lock and bpf_timer, the kernel will preserve backwards compatibility, as they are part of UAPI. For kptrs, they are also part of UAPI, but only with respect to the kptr -mechanism. The types that you can use with a __kptr and __kptr_ref tagged +mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged pointer in your struct are NOT part of the UAPI contract. The supported types can and will change across kernel releases. However, operations like accessing kptr fields and bpf_kptr_xchg() helper will continue to be supported across kernel diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 24bef9cbbeeea..75344cd230e55 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -51,7 +51,7 @@ For example: .. code-block:: c struct cpumask_map_value { - struct bpf_cpumask __kptr_ref * cpumask; + struct bpf_cpumask __kptr * cpumask; }; struct array_map { @@ -128,7 +128,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: /* struct containing the struct bpf_cpumask kptr which is stored in the map. */ struct cpumasks_kfunc_map_value { - struct bpf_cpumask __kptr_ref * bpf_cpumask; + struct bpf_cpumask __kptr * bpf_cpumask; }; /* The map containing struct cpumasks_kfunc_map_value entries. */ diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 9d85bbc3b7713..b5d9b0d446bce 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -544,7 +544,7 @@ Here's an example of how it can be used: /* struct containing the struct task_struct kptr which is actually stored in the map. */ struct __cgroups_kfunc_map_value { - struct cgroup __kptr_ref * cgroup; + struct cgroup __kptr * cgroup; }; /* The map containing struct __cgroups_kfunc_map_value entries. */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ef2d8969ed1f7..c5e1d6955491a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3288,9 +3288,9 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, /* Reject extra tags */ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) return -EINVAL; - if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_UNREF; - else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off))) + else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_REF; else return -EINVAL; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 5ec1871acb2fc..7d12d3e620cc8 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -174,8 +174,8 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) -#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 7653df1bc7871..ce96b33e38d66 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -4,7 +4,7 @@ #include struct map_value { - struct prog_test_ref_kfunc __kptr_ref *ptr; + struct prog_test_ref_kfunc __kptr *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 2f8de933b957d..d0b7cd0d09d77 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -10,7 +10,7 @@ #include struct __cgrps_kfunc_map_value { - struct cgroup __kptr_ref * cgrp; + struct cgroup __kptr * cgrp; }; struct hash_map { diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index ad34f3b602bee..65e5496ca1b22 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -10,7 +10,7 @@ int err; struct __cpumask_map_value { - struct bpf_cpumask __kptr_ref * cpumask; + struct bpf_cpumask __kptr * cpumask; }; struct array_map { diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 2d2e61470794b..13f00ca2ed0a0 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -4,7 +4,7 @@ #include #include -static struct prog_test_ref_kfunc __kptr_ref *v; +static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c index 687081a724b33..ad73029cb1e30 100644 --- a/tools/testing/selftests/bpf/progs/lru_bug.c +++ b/tools/testing/selftests/bpf/progs/lru_bug.c @@ -4,7 +4,7 @@ #include struct map_value { - struct task_struct __kptr *ptr; + struct task_struct __kptr_untrusted *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index a24d17bc17eb7..3fe7cde4cbfd3 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -4,8 +4,8 @@ #include struct map_value { - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; }; struct array_map { diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 760e41e1a6326..e19e2a5f38cfb 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -7,9 +7,9 @@ struct map_value { char buf[8]; - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; - struct prog_test_member __kptr_ref *ref_memb_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; + struct prog_test_member __kptr *ref_memb_ptr; }; struct array_map { diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index c0ffd171743ed..4c2a4b0e3a251 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -10,7 +10,7 @@ #include struct __tasks_kfunc_map_value { - struct task_struct __kptr_ref * task; + struct task_struct __kptr * task; }; struct hash_map { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8b9949bb833d7..49a70d9beb0bc 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu) * struct bpf_timer t; * }; * struct btf_ptr { + * struct prog_test_ref_kfunc __kptr_untrusted *ptr; * struct prog_test_ref_kfunc __kptr *ptr; - * struct prog_test_ref_kfunc __kptr_ref *ptr; - * struct prog_test_member __kptr_ref *ptr; + * struct prog_test_member __kptr *ptr; * } */ static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" - "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted" "\0prog_test_member"; static __u32 btf_raw_types[] = { /* int */ @@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ /* struct prog_test_ref_kfunc */ /* [6] */ BTF_STRUCT_ENC(51, 0, 0), - BTF_STRUCT_ENC(89, 0, 0), /* [7] */ + BTF_STRUCT_ENC(95, 0, 0), /* [7] */ + /* type tag "kptr_untrusted" */ + BTF_TYPE_TAG_ENC(80, 6), /* [8] */ /* type tag "kptr" */ - BTF_TYPE_TAG_ENC(75, 6), /* [8] */ - /* type tag "kptr_ref" */ - BTF_TYPE_TAG_ENC(80, 6), /* [9] */ - BTF_TYPE_TAG_ENC(80, 7), /* [10] */ + BTF_TYPE_TAG_ENC(75, 6), /* [9] */ + BTF_TYPE_TAG_ENC(75, 7), /* [10] */ BTF_PTR_ENC(8), /* [11] */ BTF_PTR_ENC(9), /* [12] */ BTF_PTR_ENC(10), /* [13] */ /* struct btf_ptr */ /* [14] */ BTF_STRUCT_ENC(43, 3, 24), - BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ - BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ - BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */ + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */ + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */ }; static char bpf_vlog[UINT_MAX >> 8]; -- GitLab From 8d093b4e95a2a16a2cfcd36869b348a17112fabe Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:42 -0800 Subject: [PATCH 0116/2078] bpf: Mark cgroups and dfl_cgrp fields as trusted. bpf programs sometimes do: bpf_cgrp_storage_get(&map, task->cgroups->dfl_cgrp, ...); It is safe to do, because cgroups->dfl_cgrp pointer is set diring init and never changes. The task->cgroups is also never NULL. It is also set during init and will change when task switches cgroups. For any trusted task pointer dereference of cgroups and dfl_cgrp should yield trusted pointers. The verifier wasn't aware of this. Hence in gcc compiled kernels task->cgroups dereference was producing PTR_TO_BTF_ID without modifiers while in clang compiled kernels the verifier recognizes __rcu tag in cgroups field and produces PTR_TO_BTF_ID | MEM_RCU | MAYBE_NULL. Tag cgroups and dfl_cgrp as trusted to equalize clang and gcc behavior. When GCC supports btf_type_tag such tagging will done directly in the type. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: David Vernet Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20230303041446.3630-3-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf580f246a013..b834f3d2d81a5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5046,6 +5046,11 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) BTF_TYPE_SAFE_NESTED(struct task_struct) { const cpumask_t *cpus_ptr; + struct css_set __rcu *cgroups; +}; + +BTF_TYPE_SAFE_NESTED(struct css_set) { + struct cgroup *dfl_cgrp; }; static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, @@ -5057,6 +5062,7 @@ static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, return false; BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct css_set)); return btf_nested_type_is_trusted(&env->log, reg, off); } -- GitLab From 20c09d92faeefb8536f705d3a4629e0dc314c8a1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:43 -0800 Subject: [PATCH 0117/2078] bpf: Introduce kptr_rcu. The life time of certain kernel structures like 'struct cgroup' is protected by RCU. Hence it's safe to dereference them directly from __kptr tagged pointers in bpf maps. The resulting pointer is MEM_RCU and can be passed to kfuncs that expect KF_RCU. Derefrence of other kptr-s returns PTR_UNTRUSTED. For example: struct map_value { struct cgroup __kptr *cgrp; }; SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp_arg, const char *path) { struct cgroup *cg, *cg2; cg = bpf_cgroup_acquire(cgrp_arg); // cg is PTR_TRUSTED and ref_obj_id > 0 bpf_kptr_xchg(&v->cgrp, cg); cg2 = v->cgrp; // This is new feature introduced by this patch. // cg2 is PTR_MAYBE_NULL | MEM_RCU. // When cg2 != NULL, it's a valid cgroup, but its percpu_ref could be zero if (cg2) bpf_cgroup_ancestor(cg2, level); // safe to do. } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-4-alexei.starovoitov@gmail.com --- Documentation/bpf/kfuncs.rst | 12 ++-- include/linux/btf.h | 2 +- kernel/bpf/helpers.c | 6 +- kernel/bpf/verifier.c | 55 ++++++++++++++++--- net/bpf/test_run.c | 3 +- .../selftests/bpf/progs/cgrp_kfunc_failure.c | 2 +- .../selftests/bpf/progs/map_kptr_fail.c | 4 +- tools/testing/selftests/bpf/verifier/calls.c | 2 +- .../testing/selftests/bpf/verifier/map_kptr.c | 2 +- 9 files changed, 65 insertions(+), 23 deletions(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index b5d9b0d446bce..69eccf6f98ef4 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -249,11 +249,13 @@ added later. 2.4.8 KF_RCU flag ----------------- -The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument. -When used together with KF_ACQUIRE, it indicates the kfunc should have a -single argument which must be a trusted argument or a MEM_RCU pointer. -The argument may have reference count of 0 and the kfunc must take this -into consideration. +The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with +KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees +that the objects are valid and there is no use-after-free. The pointers are not +NULL, but the object's refcount could have reached zero. The kfuncs need to +consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE +pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely +also be KF_RET_NULL. .. _KF_deprecated_flag: diff --git a/include/linux/btf.h b/include/linux/btf.h index 49e0fe6d8274b..556b3e2e7471e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -70,7 +70,7 @@ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ -#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ +#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 12f12e879bcf0..637ac4e92e756 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2163,8 +2163,10 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) if (level > cgrp->level || level < 0) return NULL; + /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */ ancestor = cgrp->ancestors[level]; - cgroup_get(ancestor); + if (!cgroup_tryget(ancestor)) + return NULL; return ancestor; } @@ -2382,7 +2384,7 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b834f3d2d81a5..a095055d7ef4f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4218,7 +4218,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno) { const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); - int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED; + int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; const char *reg_name = ""; /* Only unreferenced case accepts untrusted pointers */ @@ -4285,6 +4285,34 @@ bad_type: return -EINVAL; } +/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() + * can dereference RCU protected pointers and result is PTR_TRUSTED. + */ +static bool in_rcu_cs(struct bpf_verifier_env *env) +{ + return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable; +} + +/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ +BTF_SET_START(rcu_protected_types) +BTF_ID(struct, prog_test_ref_kfunc) +BTF_ID(struct, cgroup) +BTF_SET_END(rcu_protected_types) + +static bool rcu_protected_object(const struct btf *btf, u32 btf_id) +{ + if (!btf_is_kernel(btf)) + return false; + return btf_id_set_contains(&rcu_protected_types, btf_id); +} + +static bool rcu_safe_kptr(const struct btf_field *field) +{ + const struct btf_field_kptr *kptr = &field->kptr; + + return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id); +} + static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, int value_regno, int insn_idx, struct btf_field *kptr_field) @@ -4319,7 +4347,10 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, * value from map as PTR_TO_BTF_ID, with the correct type. */ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf, - kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED); + kptr_field->kptr.btf_id, + rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ? + PTR_MAYBE_NULL | MEM_RCU : + PTR_MAYBE_NULL | PTR_UNTRUSTED); /* For mark_ptr_or_null_reg */ val_reg->id = ++env->id_gen; } else if (class == BPF_STX) { @@ -5163,10 +5194,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, * An RCU-protected pointer can also be deemed trusted if we are in an * RCU read region. This case is handled below. */ - if (nested_ptr_is_trusted(env, reg, off)) + if (nested_ptr_is_trusted(env, reg, off)) { flag |= PTR_TRUSTED; - else + /* + * task->cgroups is trusted. It provides a stronger guarantee + * than __rcu tag on 'cgroups' field in 'struct task_struct'. + * Clear MEM_RCU in such case. + */ + flag &= ~MEM_RCU; + } else { flag &= ~PTR_TRUSTED; + } if (flag & MEM_RCU) { /* Mark value register as MEM_RCU only if it is protected by @@ -5175,11 +5213,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since * it could be null in some cases. */ - if (!env->cur_state->active_rcu_lock || - !(is_trusted_reg(reg) || is_rcu_reg(reg))) - flag &= ~MEM_RCU; - else + if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg))) flag |= PTR_MAYBE_NULL; + else + flag &= ~MEM_RCU; } else if (reg->type & MEM_RCU) { /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. @@ -9676,7 +9713,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } - if (is_kfunc_trusted_args(meta) && + if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) && (register_is_null(reg) || type_may_be_null(reg->type))) { verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); return -EACCES; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6f3d654b3339c..6a8b33a103a40 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -737,6 +737,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) { + /* p != NULL, but p->cnt could be 0 */ } __bpf_kfunc void bpf_kfunc_call_test_destructive(void) @@ -784,7 +785,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_SET8_END(test_sk_check_kfunc_ids) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 4ad7fe24966de..b42291ed95869 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +__failure __msg("expects refcounted") int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value *v; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index e19e2a5f38cfb..08f9ec18c345a 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") +__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_") int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R2 type=untrusted_ptr_ expected=ptr_") +__failure __msg("R2 must be referenced") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 289ed202ec66a..9a326a800e5cf 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -243,7 +243,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "R1 must be referenced", + .errstr = "R1 must be", }, { "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index 6914904344c09..d775ccb019892 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -336,7 +336,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", + .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_", }, { "map_kptr: ref: reject off != 0", -- GitLab From 838bd4ac9aa35bdf43bf0199fa8eef9d3a004611 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:44 -0800 Subject: [PATCH 0118/2078] selftests/bpf: Add a test case for kptr_rcu. Tweak existing map_kptr test to check kptr_rcu. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-5-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/progs/map_kptr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 3fe7cde4cbfd3..3903d30217b8f 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -118,6 +118,7 @@ extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) @@ -147,12 +148,23 @@ static void test_kptr_ref(struct map_value *v) WRITE_ONCE(v->unref_ptr, p); if (!p) return; + /* + * p is rcu_ptr_prog_test_ref_kfunc, + * because bpf prog is non-sleepable and runs in RCU CS. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) return; /* store NULL */ p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) return; + /* + * p is trusted_ptr_prog_test_ref_kfunc. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) { bpf_kfunc_call_test_release(p); return; -- GitLab From 0047d8343f6042c4feea24072ef254d47b8a33b3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:45 -0800 Subject: [PATCH 0119/2078] selftests/bpf: Tweak cgroup kfunc test. Adjust cgroup kfunc test to dereference RCU protected cgroup pointer as PTR_TRUSTED and pass into KF_TRUSTED_ARGS kfunc. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-6-alexei.starovoitov@gmail.com --- .../testing/selftests/bpf/progs/cgrp_kfunc_success.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 42e13aebdd627..030aff7000844 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -61,7 +61,7 @@ int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *pa SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) { - struct cgroup *kptr; + struct cgroup *kptr, *cg; struct __cgrps_kfunc_map_value *v; long status; @@ -80,6 +80,16 @@ int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) return 0; } + kptr = v->cgrp; + if (!kptr) { + err = 4; + return 0; + } + + cg = bpf_cgroup_ancestor(kptr, 1); + if (cg) /* verifier only check */ + bpf_cgroup_release(cg); + kptr = bpf_kptr_xchg(&v->cgrp, NULL); if (!kptr) { err = 3; -- GitLab From 6fcd486b3a0a628c41f12b3a7329a18a2c74b351 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 2 Mar 2023 20:14:46 -0800 Subject: [PATCH 0120/2078] bpf: Refactor RCU enforcement in the verifier. bpf_rcu_read_lock/unlock() are only available in clang compiled kernels. Lack of such key mechanism makes it impossible for sleepable bpf programs to use RCU pointers. Allow bpf_rcu_read_lock/unlock() in GCC compiled kernels (though GCC doesn't support btf_type_tag yet) and allowlist certain field dereferences in important data structures like tast_struct, cgroup, socket that are used by sleepable programs either as RCU pointer or full trusted pointer (which is valid outside of RCU CS). Use BTF_TYPE_SAFE_RCU and BTF_TYPE_SAFE_TRUSTED macros for such tagging. They will be removed once GCC supports btf_type_tag. With that refactor check_ptr_to_btf_access(). Make it strict in enforcing PTR_TRUSTED and PTR_UNTRUSTED while deprecating old PTR_TO_BTF_ID without modifier flags. There is a chance that this strict enforcement might break existing programs (especially on GCC compiled kernels), but this cleanup has to start sooner than later. Note PTR_TO_CTX access still yields old deprecated PTR_TO_BTF_ID. Once it's converted to strict PTR_TRUSTED or PTR_UNTRUSTED the kfuncs and helpers will be able to default to KF_TRUSTED_ARGS. KF_RCU will remain as a weaker version of KF_TRUSTED_ARGS where obj refcnt could be 0. Adjust rcu_read_lock selftest to run on gcc and clang compiled kernels. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230303041446.3630-7-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 1 - kernel/bpf/btf.c | 16 +- kernel/bpf/cpumask.c | 40 ++-- kernel/bpf/verifier.c | 178 ++++++++++++------ .../bpf/prog_tests/cgrp_local_storage.c | 14 +- .../selftests/bpf/prog_tests/rcu_read_lock.c | 16 +- .../selftests/bpf/progs/cgrp_ls_sleepable.c | 4 +- .../selftests/bpf/progs/cpumask_failure.c | 2 +- .../bpf/progs/nested_trust_failure.c | 2 +- .../selftests/bpf/progs/rcu_read_lock.c | 6 +- tools/testing/selftests/bpf/verifier/calls.c | 2 +- 12 files changed, 173 insertions(+), 110 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 23ec684e660d5..d3456804f7aa6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2279,7 +2279,7 @@ struct bpf_core_ctx { bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off); + int off, const char *suffix); bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, const struct btf *reg_btf, u32 reg_id, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b26ff2a8f63bc..18538bad2b8c7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -537,7 +537,6 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; - bool rcu_tag_supported; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c5e1d6955491a..a8cb09e5973b7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6163,6 +6163,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const char *tname, *mname, *tag_value; u32 vlen, elem_id, mid; + *flag = 0; again: tname = __btf_name_by_offset(btf, t->name_off); if (!btf_type_is_struct(t)) { @@ -6329,6 +6330,15 @@ error: * of this field or inside of this struct */ if (btf_type_is_struct(mtype)) { + if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION && + btf_type_vlen(mtype) != 1) + /* + * walking unions yields untrusted pointers + * with exception of __bpf_md_ptr and other + * unions with a single member + */ + *flag |= PTR_UNTRUSTED; + /* our field must be inside that union or struct */ t = mtype; @@ -6373,7 +6383,7 @@ error: stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; - *flag = tmp_flag; + *flag |= tmp_flag; return WALK_PTR; } } @@ -8357,7 +8367,7 @@ out: bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off) + int off, const char *suffix) { struct btf *btf = reg->btf; const struct btf_type *walk_type, *safe_type; @@ -8374,7 +8384,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, tname = btf_name_by_offset(btf, walk_type->name_off); - ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname); + ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); if (ret < 0) return false; diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2b3fbbfebdc5f..b6587ec40f1b8 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a095055d7ef4f..c2adf3c24c64a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5073,29 +5073,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) return 0; } -#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields) +#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) +#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) -BTF_TYPE_SAFE_NESTED(struct task_struct) { +/* + * Allow list few fields as RCU trusted or full trusted. + * This logic doesn't allow mix tagging and will be removed once GCC supports + * btf_type_tag. + */ + +/* RCU trusted: these fields are trusted in RCU CS and never NULL */ +BTF_TYPE_SAFE_RCU(struct task_struct) { const cpumask_t *cpus_ptr; struct css_set __rcu *cgroups; + struct task_struct __rcu *real_parent; + struct task_struct *group_leader; }; -BTF_TYPE_SAFE_NESTED(struct css_set) { +BTF_TYPE_SAFE_RCU(struct css_set) { struct cgroup *dfl_cgrp; }; -static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, - int off) +/* full trusted: these fields are trusted even outside of RCU CS and never NULL */ +BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { + __bpf_md_ptr(struct seq_file *, seq); +}; + +BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct task_struct *, task); +}; + +BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) { + struct file *file; +}; + +BTF_TYPE_SAFE_TRUSTED(struct file) { + struct inode *f_inode; +}; + +BTF_TYPE_SAFE_TRUSTED(struct dentry) { + /* no negative dentry-s in places where bpf can see it */ + struct inode *d_inode; +}; + +BTF_TYPE_SAFE_TRUSTED(struct socket) { + struct sock *sk; +}; + +static bool type_is_rcu(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) { - /* If its parent is not trusted, it can't regain its trusted status. */ - if (!is_trusted_reg(reg)) - return false; + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct css_set)); + return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu"); +} - return btf_nested_type_is_trusted(&env->log, reg, off); +static bool type_is_trusted(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) +{ + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); + + return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted"); } static int check_ptr_to_btf_access(struct bpf_verifier_env *env, @@ -5181,49 +5228,58 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - /* If this is an untrusted pointer, all pointers formed by walking it - * also inherit the untrusted flag. - */ - if (type_flag(reg->type) & PTR_UNTRUSTED) - flag |= PTR_UNTRUSTED; + if (ret != PTR_TO_BTF_ID) { + /* just mark; */ - /* By default any pointer obtained from walking a trusted pointer is no - * longer trusted, unless the field being accessed has explicitly been - * marked as inheriting its parent's state of trust. - * - * An RCU-protected pointer can also be deemed trusted if we are in an - * RCU read region. This case is handled below. - */ - if (nested_ptr_is_trusted(env, reg, off)) { - flag |= PTR_TRUSTED; - /* - * task->cgroups is trusted. It provides a stronger guarantee - * than __rcu tag on 'cgroups' field in 'struct task_struct'. - * Clear MEM_RCU in such case. + } else if (type_flag(reg->type) & PTR_UNTRUSTED) { + /* If this is an untrusted pointer, all pointers formed by walking it + * also inherit the untrusted flag. + */ + flag = PTR_UNTRUSTED; + + } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) { + /* By default any pointer obtained from walking a trusted pointer is no + * longer trusted, unless the field being accessed has explicitly been + * marked as inheriting its parent's state of trust (either full or RCU). + * For example: + * 'cgroups' pointer is untrusted if task->cgroups dereference + * happened in a sleepable program outside of bpf_rcu_read_lock() + * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU). + * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED. + * + * A regular RCU-protected pointer with __rcu tag can also be deemed + * trusted if we are in an RCU CS. Such pointer can be NULL. */ - flag &= ~MEM_RCU; + if (type_is_trusted(env, reg, off)) { + flag |= PTR_TRUSTED; + } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { + if (type_is_rcu(env, reg, off)) { + /* ignore __rcu tag and mark it MEM_RCU */ + flag |= MEM_RCU; + } else if (flag & MEM_RCU) { + /* __rcu tagged pointers can be NULL */ + flag |= PTR_MAYBE_NULL; + } else if (flag & (MEM_PERCPU | MEM_USER)) { + /* keep as-is */ + } else { + /* walking unknown pointers yields untrusted pointer */ + flag = PTR_UNTRUSTED; + } + } else { + /* + * If not in RCU CS or MEM_RCU pointer can be NULL then + * aggressively mark as untrusted otherwise such + * pointers will be plain PTR_TO_BTF_ID without flags + * and will be allowed to be passed into helpers for + * compat reasons. + */ + flag = PTR_UNTRUSTED; + } } else { + /* Old compat. Deprecated */ flag &= ~PTR_TRUSTED; } - if (flag & MEM_RCU) { - /* Mark value register as MEM_RCU only if it is protected by - * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU - * itself can already indicate trustedness inside the rcu - * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since - * it could be null in some cases. - */ - if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg))) - flag |= PTR_MAYBE_NULL; - else - flag &= ~MEM_RCU; - } else if (reg->type & MEM_RCU) { - /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged - * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. - */ - flag |= PTR_UNTRUSTED; - } - if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); @@ -10049,10 +10105,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); - if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) { - verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name); - return -EACCES; - } if (env->cur_state->active_rcu_lock) { struct bpf_func_state *state; @@ -14911,8 +14963,22 @@ static int do_check(struct bpf_verifier_env *env) * src_reg == stack|map in some other branch. * Reject it. */ - verbose(env, "same insn cannot be used with different pointers\n"); - return -EINVAL; + if (base_type(src_reg_type) == PTR_TO_BTF_ID && + base_type(*prev_src_type) == PTR_TO_BTF_ID) { + /* + * Have to support a use case when one path through + * the program yields TRUSTED pointer while another + * is UNTRUSTED. Fallback to UNTRUSTED to generate + * BPF_PROBE_MEM. + */ + *prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + } else { + verbose(env, + "The same insn cannot be used with different pointers: %s", + reg_type_str(env, src_reg_type)); + verbose(env, " != %s\n", reg_type_str(env, *prev_src_type)); + return -EINVAL; + } } } else if (class == BPF_STX) { @@ -17984,8 +18050,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); - env->rcu_tag_supported = btf_vmlinux && - btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0; if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 2cc759956e3b4..63e776f4176eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -193,7 +193,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_no_rcu_lock(__u64 cgroup_id) +static void test_yes_rcu_lock(__u64 cgroup_id) { struct cgrp_ls_sleepable *skel; int err; @@ -204,7 +204,7 @@ static void test_no_rcu_lock(__u64 cgroup_id) skel->bss->target_pid = syscall(SYS_gettid); - bpf_program__set_autoload(skel->progs.no_rcu_lock, true); + bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -220,7 +220,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_rcu_lock(void) +static void test_no_rcu_lock(void) { struct cgrp_ls_sleepable *skel; int err; @@ -229,7 +229,7 @@ static void test_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -256,10 +256,10 @@ void test_cgrp_local_storage(void) test_negative(); if (test__start_subtest("cgroup_iter_sleepable")) test_cgroup_iter_sleepable(cgroup_fd, cgroup_id); + if (test__start_subtest("yes_rcu_lock")) + test_yes_rcu_lock(cgroup_id); if (test__start_subtest("no_rcu_lock")) - test_no_rcu_lock(cgroup_id); - if (test__start_subtest("rcu_lock")) - test_rcu_lock(); + test_no_rcu_lock(); close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index 447d8560ecb63..3f1f58d3a729c 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -25,10 +25,10 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); - bpf_program__set_autoload(skel->progs.no_lock, true); bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); err = rcu_read_lock__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -69,6 +69,7 @@ out: static const char * const inproper_region_tests[] = { "miss_lock", + "no_lock", "miss_unlock", "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", @@ -99,7 +100,6 @@ out: } static const char * const rcuptr_misuse_tests[] = { - "task_untrusted_non_rcuptr", "task_untrusted_rcuptr", "cross_rcu_region", }; @@ -128,17 +128,8 @@ out: void test_rcu_read_lock(void) { - struct btf *vmlinux_btf; int cgroup_fd; - vmlinux_btf = btf__load_vmlinux_btf(); - if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) - return; - if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { - test__skip(); - goto out; - } - cgroup_fd = test__join_cgroup("/rcu_read_lock"); if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) goto out; @@ -153,6 +144,5 @@ void test_rcu_read_lock(void) if (test__start_subtest("negative_tests_rcuptr_misuse")) test_rcuptr_misuse(); close(cgroup_fd); -out: - btf__free(vmlinux_btf); +out:; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 2d11ed528b6f8..7615dc23d301e 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -49,7 +49,7 @@ int no_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; - /* ptr_to_btf_id semantics. should work. */ + /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ cgrp = task->cgroups->dfl_cgrp; ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -71,7 +71,7 @@ int yes_rcu_lock(void *ctx) bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; - /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */ + /* cgrp is trusted under RCU CS */ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) cgroup_id = cgrp->kn->id; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 33e8e86dd090e..c16f7563b84ec 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -44,7 +44,7 @@ int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flag } SEC("tp_btf/task_newtask") -__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask") +__failure __msg("must be referenced") int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index 14aff76764367..0d1aa6bbace4b 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -17,7 +17,7 @@ char _license[] SEC("license") = "GPL"; */ SEC("tp_btf/task_newtask") -__failure __msg("R2 must be referenced or trusted") +__failure __msg("R2 must be") int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags) { bpf_cpumask_test_cpu(0, task->user_cpus_ptr); diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 5cecbdbbb16ee..7250bb76d18ad 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -81,7 +81,7 @@ int no_lock(void *ctx) { struct task_struct *task, *real_parent; - /* no bpf_rcu_read_lock(), old code still works */ + /* old style ptr_to_btf_id is not allowed in sleepable */ task = bpf_get_current_task_btf(); real_parent = task->real_parent; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); @@ -286,13 +286,13 @@ out: } SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int task_untrusted_non_rcuptr(void *ctx) +int task_trusted_non_rcuptr(void *ctx) { struct task_struct *task, *group_leader; task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); - /* the pointer group_leader marked as untrusted */ + /* the pointer group_leader is explicitly marked as trusted */ group_leader = task->real_parent->group_leader; (void)bpf_task_storage_get(&map_a, group_leader, 0, 0); bpf_rcu_read_unlock(); diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 9a326a800e5cf..5702fc9761ef9 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -181,7 +181,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "negative offset ptr_ ptr R1 off=-4 disallowed", + .errstr = "ptr R1 off=-4 disallowed", }, { "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset", -- GitLab From e768e3c5aab44ee63f58649d4c8cbbb3270e5c06 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 3 Mar 2023 15:15:42 +0100 Subject: [PATCH 0121/2078] bpf: Use separate RCU callbacks for freeing selem Martin suggested that instead of using a byte in the hole (which he has a use for in his future patch) in bpf_local_storage_elem, we can dispatch a different call_rcu callback based on whether we need to free special fields in bpf_local_storage_elem data. The free path, described in commit 9db44fdd8105 ("bpf: Support kptrs in local storage maps"), only waits for call_rcu callbacks when there are special (kptrs, etc.) fields in the map value, hence it is necessary that we only access smap in this case. Therefore, dispatch different RCU callbacks based on the BPF map has a valid btf_record, which dereference and use smap's btf_record only when it is valid. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230303141542.300068-1-memxor@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf_local_storage.h | 6 --- kernel/bpf/bpf_local_storage.c | 79 +++++++++++++++++++------------ 2 files changed, 49 insertions(+), 36 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 0fe92986412b7..6d37a40cd90e8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -74,12 +74,6 @@ struct bpf_local_storage_elem { struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; struct rcu_head rcu; - bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU - * callbacks? bpf_local_storage_map_free only - * executes rcu_barrier when there are special - * fields, this field remembers that to ensure we - * don't access already freed smap in sdata. - */ /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 2bdd722fe2934..3d320393a12c9 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,30 +109,36 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_rcu(struct rcu_head *rcu) +static void bpf_selem_free_fields_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - /* The can_use_smap bool is set whenever we need to free additional - * fields in selem data before freeing selem. bpf_local_storage_map_free - * only executes rcu_barrier to wait for RCU callbacks when it has - * special fields, hence we can only conditionally dereference smap, as - * by this time the map might have already been freed without waiting - * for our call_rcu callback if it did not have any special fields. - */ - if (selem->can_use_smap) - bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data); + /* protected by the rcu_barrier*() */ + smap = rcu_dereference_protected(SDATA(selem)->smap, true); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); kfree(selem); } -static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu) +static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu) { /* Free directly if Tasks Trace RCU GP also implies RCU GP */ if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_rcu(rcu); + bpf_selem_free_fields_rcu(rcu); + else + call_rcu(rcu, bpf_selem_free_fields_rcu); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + if (rcu_trace_implies_rcu_gp()) + kfree(selem); else - call_rcu(rcu, bpf_selem_free_rcu); + kfree_rcu(selem, rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -145,6 +151,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; + struct btf_record *rec; void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); @@ -185,10 +192,26 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu); - else - call_rcu(&selem->rcu, bpf_selem_free_rcu); + /* A different RCU callback is chosen whenever we need to free + * additional fields in selem data before freeing selem. + * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU + * callbacks when it has special fields, hence we can only conditionally + * dereference smap, as by this time the map might have already been + * freed without waiting for our call_rcu callback if it did not have + * any special fields. + */ + rec = smap->map.record; + if (use_trace_rcu) { + if (!IS_ERR_OR_NULL(rec)) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); + else + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + } else { + if (!IS_ERR_OR_NULL(rec)) + call_rcu(&selem->rcu, bpf_selem_free_fields_rcu); + else + kfree_rcu(selem, rcu); + } return free_local_storage; } @@ -256,11 +279,6 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, RCU_INIT_POINTER(SDATA(selem)->smap, smap); hlist_add_head_rcu(&selem->map_node, &b->list); raw_spin_unlock_irqrestore(&b->lock, flags); - - /* If our data will have special fields, smap will wait for us to use - * its record in bpf_selem_free_* RCU callbacks before freeing itself. - */ - selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record); } void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) @@ -748,19 +766,20 @@ void bpf_local_storage_map_free(struct bpf_map *map, kvfree(smap->buckets); /* When local storage has special fields, callbacks for - * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using - * the map BTF record, we need to execute an RCU barrier to wait for - * them as the record will be freed right after our map_free callback. + * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will + * keep using the map BTF record, we need to execute an RCU barrier to + * wait for them as the record will be freed right after our map_free + * callback. */ if (!IS_ERR_OR_NULL(smap->map.record)) { rcu_barrier_tasks_trace(); /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() * is true, because while call_rcu invocation is skipped in that - * case in bpf_selem_free_tasks_trace_rcu (and all local storage - * maps pass use_trace_rcu = true), there can be call_rcu - * callbacks based on use_trace_rcu = false in the earlier while - * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock - * called from owner's free path. + * case in bpf_selem_free_fields_trace_rcu (and all local + * storage maps pass use_trace_rcu = true), there can be + * call_rcu callbacks based on use_trace_rcu = false in the + * while ((selem = ...)) loop above or when owner's free path + * calls bpf_local_storage_unlink_nolock. */ rcu_barrier(); } -- GitLab From 0d80a619c113d0e216dbffa56b2d5ccc079ee520 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 4 Mar 2023 03:12:45 +0200 Subject: [PATCH 0122/2078] bpf: allow ctx writes using BPF_ST_MEM instruction Lift verifier restriction to use BPF_ST_MEM instructions to write to context data structures. This requires the following changes: - verifier.c:do_check() for BPF_ST updated to: - no longer forbid writes to registers of type PTR_TO_CTX; - track dst_reg type in the env->insn_aux_data[...].ptr_type field (same way it is done for BPF_STX and BPF_LDX instructions). - verifier.c:convert_ctx_access() and various callbacks invoked by it are updated to handled BPF_ST instruction alongside BPF_STX. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230304011247.566040-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 49 +++++---- kernel/bpf/verifier.c | 110 ++++++++++----------- net/core/filter.c | 79 ++++++++------- tools/testing/selftests/bpf/verifier/ctx.c | 11 --- 4 files changed, 126 insertions(+), 123 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a4ae422b8f122..53edb8ad24717 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2223,10 +2223,12 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, ppos)); - *insn++ = BPF_STX_MEM( - BPF_SIZEOF(u32), treg, si->src_reg, + *insn++ = BPF_RAW_INSN( + BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32), + treg, si->src_reg, bpf_ctx_narrow_access_offset( - 0, sizeof(u32), sizeof(loff_t))); + 0, sizeof(u32), sizeof(loff_t)), + si->imm); *insn++ = BPF_LDX_MEM( BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, tmp_reg)); @@ -2376,10 +2378,17 @@ static bool cg_sockopt_is_valid_access(int off, int size, return true; } -#define CG_SOCKOPT_ACCESS_FIELD(T, F) \ - T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sockopt_kern, F)) +#define CG_SOCKOPT_READ_FIELD(F) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sockopt_kern, F)) + +#define CG_SOCKOPT_WRITE_FIELD(F) \ + BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \ + BPF_MEM | BPF_CLASS(si->code)), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sockopt_kern, F), \ + si->imm) static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, @@ -2391,25 +2400,25 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, switch (si->off) { case offsetof(struct bpf_sockopt, sk): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk); + *insn++ = CG_SOCKOPT_READ_FIELD(sk); break; case offsetof(struct bpf_sockopt, level): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level); + *insn++ = CG_SOCKOPT_WRITE_FIELD(level); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level); + *insn++ = CG_SOCKOPT_READ_FIELD(level); break; case offsetof(struct bpf_sockopt, optname): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname); + *insn++ = CG_SOCKOPT_WRITE_FIELD(optname); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname); + *insn++ = CG_SOCKOPT_READ_FIELD(optname); break; case offsetof(struct bpf_sockopt, optlen): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen); + *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); + *insn++ = CG_SOCKOPT_READ_FIELD(optlen); break; case offsetof(struct bpf_sockopt, retval): BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); @@ -2429,9 +2438,11 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), treg, treg, offsetof(struct task_struct, bpf_ctx)); - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), - treg, si->src_reg, - offsetof(struct bpf_cg_run_ctx, retval)); + *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM | + BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + treg, si->src_reg, + offsetof(struct bpf_cg_run_ctx, retval), + si->imm); *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sockopt_kern, tmp_reg)); } else { @@ -2447,10 +2458,10 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, } break; case offsetof(struct bpf_sockopt, optval): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); + *insn++ = CG_SOCKOPT_READ_FIELD(optval); break; case offsetof(struct bpf_sockopt, optval_end): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end); + *insn++ = CG_SOCKOPT_READ_FIELD(optval_end); break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c2adf3c24c64a..4c5d2b5e25c8d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14813,6 +14813,44 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) !reg_type_mismatch_ok(prev)); } +static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, + bool allow_trust_missmatch) +{ + enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_type == NOT_INIT) { + /* Saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) + * save type to validate intersecting paths + */ + *prev_type = type; + } else if (reg_type_mismatch(type, *prev_type)) { + /* Abuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: + * src_reg == ctx in one branch and + * src_reg == stack|map in some other branch. + * Reject it. + */ + if (allow_trust_missmatch && + base_type(type) == PTR_TO_BTF_ID && + base_type(*prev_type) == PTR_TO_BTF_ID) { + /* + * Have to support a use case when one path through + * the program yields TRUSTED pointer while another + * is UNTRUSTED. Fallback to UNTRUSTED to generate + * BPF_PROBE_MEM. + */ + *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + } else { + verbose(env, "same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } + + return 0; +} + static int do_check(struct bpf_verifier_env *env) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -14922,7 +14960,7 @@ static int do_check(struct bpf_verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type *prev_src_type, src_reg_type; + enum bpf_reg_type src_reg_type; /* check for reserved fields is already done */ @@ -14946,43 +14984,11 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; - - if (*prev_src_type == NOT_INIT) { - /* saw a valid insn - * dst_reg = *(u32 *)(src_reg + off) - * save type to validate intersecting paths - */ - *prev_src_type = src_reg_type; - - } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { - /* ABuser program is trying to use the same insn - * dst_reg = *(u32*) (src_reg + off) - * with different pointer types: - * src_reg == ctx in one branch and - * src_reg == stack|map in some other branch. - * Reject it. - */ - if (base_type(src_reg_type) == PTR_TO_BTF_ID && - base_type(*prev_src_type) == PTR_TO_BTF_ID) { - /* - * Have to support a use case when one path through - * the program yields TRUSTED pointer while another - * is UNTRUSTED. Fallback to UNTRUSTED to generate - * BPF_PROBE_MEM. - */ - *prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; - } else { - verbose(env, - "The same insn cannot be used with different pointers: %s", - reg_type_str(env, src_reg_type)); - verbose(env, " != %s\n", reg_type_str(env, *prev_src_type)); - return -EINVAL; - } - } - + err = save_aux_ptr_type(env, src_reg_type, true); + if (err) + return err; } else if (class == BPF_STX) { - enum bpf_reg_type *prev_dst_type, dst_reg_type; + enum bpf_reg_type dst_reg_type; if (BPF_MODE(insn->code) == BPF_ATOMIC) { err = check_atomic(env, env->insn_idx, insn); @@ -15015,16 +15021,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; - - if (*prev_dst_type == NOT_INIT) { - *prev_dst_type = dst_reg_type; - } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { - verbose(env, "same insn cannot be used with different pointers\n"); - return -EINVAL; - } - + err = save_aux_ptr_type(env, dst_reg_type, false); + if (err) + return err; } else if (class == BPF_ST) { + enum bpf_reg_type dst_reg_type; + if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { verbose(env, "BPF_ST uses reserved fields\n"); @@ -15035,12 +15037,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - if (is_ctx_reg(env, insn->dst_reg)) { - verbose(env, "BPF_ST stores into R%d %s is not allowed\n", - insn->dst_reg, - reg_type_str(env, reg_state(env, insn->dst_reg)->type)); - return -EACCES; - } + dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, env->insn_idx, insn->dst_reg, @@ -15049,6 +15046,9 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + err = save_aux_ptr_type(env, dst_reg_type, false); + if (err) + return err; } else if (class == BPF_JMP || class == BPF_JMP32) { u8 opcode = BPF_OP(insn->code); @@ -16157,14 +16157,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; - bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - ctx_access = true; } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || insn->code == (BPF_STX | BPF_MEM | BPF_H) || insn->code == (BPF_STX | BPF_MEM | BPF_W) || @@ -16174,7 +16172,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - ctx_access = BPF_CLASS(insn->code) == BPF_STX; } else { continue; } @@ -16197,9 +16194,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } - if (!ctx_access) - continue; - switch ((int)env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) diff --git a/net/core/filter.c b/net/core/filter.c index a2dc44e70ea0a..50f649f1b4a90 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9279,11 +9279,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, #endif /* : skb->tstamp = tstamp */ - *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, - offsetof(struct sk_buff, tstamp)); + *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM, + skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm); return insn; } +#define BPF_EMIT_STORE(size, si, off) \ + BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \ + (si)->dst_reg, (si)->src_reg, (off), (si)->imm) + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -9313,9 +9317,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, priority): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, priority, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, priority, 4, @@ -9346,9 +9350,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, mark): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, mark, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, mark, 4, @@ -9367,11 +9371,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, queue_mapping): if (type == BPF_WRITE) { - *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, - queue_mapping, - 2, target_size)); + u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); + + if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { + *insn++ = BPF_JMP_A(0); /* noop */ + break; + } + + if (BPF_CLASS(si->code) == BPF_STX) + *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); } else { *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, @@ -9407,8 +9416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); @@ -9423,8 +9431,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, tc_classid); *target_size = 2; if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -9457,9 +9464,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, tc_index, 2, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_H, si, + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, tc_index, 2, @@ -9660,8 +9667,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_bound_dev_if)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_bound_dev_if)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_bound_dev_if)); @@ -9671,8 +9678,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_mark)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_mark)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_mark)); @@ -9682,8 +9689,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_priority)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_priority)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_priority)); @@ -9948,10 +9955,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, offsetof(S, TF)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ - *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ + *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \ + tmp_reg, si->src_reg, \ bpf_target_off(NS, NF, sizeof_field(NS, NF), \ target_size) \ - + OFF); \ + + OFF, \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ offsetof(S, TF)); \ } while (0) @@ -10186,9 +10195,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, struct bpf_sock_ops_kern, sk),\ reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ - reg, si->src_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \ + BPF_MEM | BPF_CLASS(si->code), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD), \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ @@ -10220,8 +10231,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct bpf_sock_ops, replylong[0]); off += offsetof(struct bpf_sock_ops_kern, replylong[0]); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - off); + *insn++ = BPF_EMIT_STORE(BPF_W, si, off); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); @@ -10578,8 +10588,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct sk_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index c8eaf0536c24b..2fd31612c0b8e 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -1,14 +1,3 @@ -{ - "context stores via ST", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ST stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, { "context stores via BPF_ATOMIC", .insns = { -- GitLab From 806f81cd1ee30c66a3d2a4cd18b13c97429397a0 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 4 Mar 2023 03:12:46 +0200 Subject: [PATCH 0123/2078] selftests/bpf: test if pointer type is tracked for BPF_ST_MEM Check that verifier tracks pointer types for BPF_ST_MEM instructions and reports error if pointer types do not match for different execution branches. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230304011247.566040-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/unpriv.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 878ca26c3f0a4..af0c0f3366252 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -239,6 +239,29 @@ .errstr = "same insn cannot be used with different pointers", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + /* Same as above, but use BPF_ST_MEM to save 42 + * instead of BPF_STX_MEM. + */ + "unpriv: spill/fill of different pointers st", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, { "unpriv: spill/fill of different pointers stx - ctx and sock", .insns = { -- GitLab From 71cf4d027ad53a1e2847191ac14e50132d35a6a7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 4 Mar 2023 03:12:47 +0200 Subject: [PATCH 0124/2078] selftests/bpf: Disassembler tests for verifier.c:convert_ctx_access() Function verifier.c:convert_ctx_access() applies some rewrites to BPF instructions that read or write BPF program context. This commit adds machinery to allow test cases that inspect BPF program after these rewrites are applied. An example of a test case: { // Shorthand for field offset and size specification N(CGROUP_SOCKOPT, struct bpf_sockopt, retval), // Pattern generated for field read .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" "$dst = *(u64 *)($dst + task_struct::bpf_ctx);" "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);", // Pattern generated for field write .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;" "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);" "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;" "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);" , }, For each test case, up to three programs are created: - One that uses BPF_LDX_MEM to read the context field. - One that uses BPF_STX_MEM to write to the context field. - One that uses BPF_ST_MEM to write to the context field. The disassembly of each program is compared with the pattern specified in the test case. Kernel code for disassembly is reused (as is in the bpftool). To keep Makefile changes to the minimum, symbolic links to `kernel/bpf/disasm.c` and `kernel/bpf/disasm.h ` are added. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230304011247.566040-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/disasm.c | 1 + tools/testing/selftests/bpf/disasm.h | 1 + .../selftests/bpf/prog_tests/ctx_rewrite.c | 917 ++++++++++++++++++ 4 files changed, 920 insertions(+), 1 deletion(-) create mode 120000 tools/testing/selftests/bpf/disasm.c create mode 120000 tools/testing/selftests/bpf/disasm.h create mode 100644 tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index eab3cf5399f52..16f404aa1b237 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -559,7 +559,7 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c + cap_helpers.c test_loader.c xsk.c disasm.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c new file mode 120000 index 0000000000000..b1571927bd54b --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.c @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.c \ No newline at end of file diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h new file mode 120000 index 0000000000000..8054fd4973401 --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.h @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.h \ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c new file mode 100644 index 0000000000000..d5fe3d4b936c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -0,0 +1,917 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include "bpf/btf.h" +#include "bpf_util.h" +#include "linux/filter.h" +#include "disasm.h" + +#define MAX_PROG_TEXT_SZ (32 * 1024) + +/* The code in this file serves the sole purpose of executing test cases + * specified in the test_cases array. Each test case specifies a program + * type, context field offset, and disassembly patterns that correspond + * to read and write instructions generated by + * verifier.c:convert_ctx_access() for accessing that field. + * + * For each test case, up to three programs are created: + * - One that uses BPF_LDX_MEM to read the context field. + * - One that uses BPF_STX_MEM to write to the context field. + * - One that uses BPF_ST_MEM to write to the context field. + * + * The disassembly of each program is then compared with the pattern + * specified in the test case. + */ +struct test_case { + char *name; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + int field_offset; + int field_sz; + /* Program generated for BPF_ST_MEM uses value 42 by default, + * this field allows to specify custom value. + */ + struct { + bool use; + int value; + } st_value; + /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */ + char *read; + /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and + * BPF_ST_MEM (field_sz, ctx, src, field_offset) + */ + char *write; + /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_st; + /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_stx; +}; + +#define N(_prog_type, type, field, name_extra...) \ + .name = #_prog_type "." #field name_extra, \ + .prog_type = BPF_PROG_TYPE_##_prog_type, \ + .field_offset = offsetof(type, field), \ + .field_sz = sizeof(typeof(((type *)NULL)->field)) + +static struct test_case test_cases[] = { +/* Sign extension on s390 changes the pattern */ +#if defined(__x86_64__) || defined(__aarch64__) + { + N(SCHED_CLS, struct __sk_buff, tstamp), + .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "w11 &= 160;" + "if w11 != 0xa0 goto pc+2;" + "$dst = 0;" + "goto pc+1;" + "$dst = *(u64 *)($ctx + sk_buff::tstamp);", + .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "if w11 & 0x80 goto pc+1;" + "goto pc+2;" + "w11 &= -33;" + "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;" + "*(u64 *)($ctx + sk_buff::tstamp) = $src;", + }, +#endif + { + N(SCHED_CLS, struct __sk_buff, priority), + .read = "$dst = *(u32 *)($ctx + sk_buff::priority);", + .write = "*(u32 *)($ctx + sk_buff::priority) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, mark), + .read = "$dst = *(u32 *)($ctx + sk_buff::mark);", + .write = "*(u32 *)($ctx + sk_buff::mark) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, cb[0]), + .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));", + .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_classid), + .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));", + .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_index), + .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);", + .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, queue_mapping), + .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);", + .write_stx = "if $src >= 0xffff goto pc+1;" + "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + }, + { + /* This is a corner case in filter.c:bpf_convert_ctx_access() */ + N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"), + .st_value = { true, USHRT_MAX }, + .write_st = "goto pc+0;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, bound_dev_if), + .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);", + .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, mark), + .read = "$dst = *(u32 *)($ctx + sock::sk_mark);", + .write = "*(u32 *)($ctx + sock::sk_mark) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, priority), + .read = "$dst = *(u32 *)($ctx + sock::sk_priority);", + .write = "*(u32 *)($ctx + sock::sk_priority) = $src;", + }, + { + N(SOCK_OPS, struct bpf_sock_ops, replylong[0]), + .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);", + .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;", + }, + { + N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos), +#if __BYTE_ORDER == __LITTLE_ENDIAN + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +0);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +0) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#else + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +4);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +4) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#endif + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, sk), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, level), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optname), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, retval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "$dst = *(u64 *)($dst + task_struct::bpf_ctx);" + "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);", + .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);" + "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, +}; + +#undef N + +static regex_t *ident_regex; +static regex_t *field_regex; + +static char *skip_space(char *str) +{ + while (*str && isspace(*str)) + ++str; + return str; +} + +static char *skip_space_and_semi(char *str) +{ + while (*str && (isspace(*str) || *str == ';')) + ++str; + return str; +} + +static char *match_str(char *str, char *prefix) +{ + while (*str && *prefix && *str == *prefix) { + ++str; + ++prefix; + } + if (*prefix) + return NULL; + return str; +} + +static char *match_number(char *str, int num) +{ + char *next; + int snum = strtol(str, &next, 10); + + if (next - str == 0 || num != snum) + return NULL; + + return next; +} + +static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off) +{ + const struct btf_type *type = btf__type_by_id(btf, btf_id); + const struct btf_member *m; + __u16 mnum; + int i; + + if (!type) { + PRINT_FAIL("Can't find btf_type for id %d\n", btf_id); + return -1; + } + + if (!btf_is_struct(type) && !btf_is_union(type)) { + PRINT_FAIL("BTF id %d is not struct or union\n", btf_id); + return -1; + } + + m = btf_members(type); + mnum = btf_vlen(type); + + for (i = 0; i < mnum; ++i, ++m) { + const char *mname = btf__name_by_offset(btf, m->name_off); + + if (strcmp(mname, "") == 0) { + int msize = find_field_offset_aux(btf, m->type, field_name, + off + m->offset); + if (msize >= 0) + return msize; + } + + if (strcmp(mname, field_name)) + continue; + + return (off + m->offset) / 8; + } + + return -1; +} + +static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches) +{ + int type_sz = matches[1].rm_eo - matches[1].rm_so; + int field_sz = matches[2].rm_eo - matches[2].rm_so; + char *type = pattern + matches[1].rm_so; + char *field = pattern + matches[2].rm_so; + char field_str[128] = {}; + char type_str[128] = {}; + int btf_id, field_offset; + + if (type_sz >= sizeof(type_str)) { + PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz); + return -1; + } + + if (field_sz >= sizeof(field_str)) { + PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz); + return -1; + } + + strncpy(type_str, type, type_sz); + strncpy(field_str, field, field_sz); + btf_id = btf__find_by_name(btf, type_str); + if (btf_id < 0) { + PRINT_FAIL("No BTF info for type %s\n", type_str); + return -1; + } + + field_offset = find_field_offset_aux(btf, btf_id, field_str, 0); + if (field_offset < 0) { + PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str); + return -1; + } + + return field_offset; +} + +static regex_t *compile_regex(char *pat) +{ + regex_t *re; + int err; + + re = malloc(sizeof(regex_t)); + if (!re) { + PRINT_FAIL("Can't alloc regex\n"); + return NULL; + } + + err = regcomp(re, pat, REG_EXTENDED); + if (err) { + char errbuf[512]; + + regerror(err, re, errbuf, sizeof(errbuf)); + PRINT_FAIL("Can't compile regex: %s\n", errbuf); + free(re); + return NULL; + } + + return re; +} + +static void free_regex(regex_t *re) +{ + if (!re) + return; + + regfree(re); + free(re); +} + +static u32 max_line_len(char *str) +{ + u32 max_line = 0; + char *next = str; + + while (next) { + next = strchr(str, '\n'); + if (next) { + max_line = max_t(u32, max_line, (next - str)); + str = next + 1; + } else { + max_line = max_t(u32, max_line, strlen(str)); + } + } + + return min(max_line, 60u); +} + +/* Print strings `pattern_origin` and `text_origin` side by side, + * assume `pattern_pos` and `text_pos` designate location within + * corresponding origin string where match diverges. + * The output should look like: + * + * Can't match disassembly(left) with pattern(right): + * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1) + * ^ ^ + * r0 = 0 ; + * exit ; + */ +static void print_match_error(FILE *out, + char *pattern_origin, char *text_origin, + char *pattern_pos, char *text_pos) +{ + char *pattern = pattern_origin; + char *text = text_origin; + int middle = max_line_len(text) + 2; + + fprintf(out, "Can't match disassembly(left) with pattern(right):\n"); + while (*pattern || *text) { + int column = 0; + int mark1 = -1; + int mark2 = -1; + + /* Print one line from text */ + while (*text && *text != '\n') { + if (text == text_pos) + mark1 = column; + fputc(*text, out); + ++text; + ++column; + } + if (text == text_pos) + mark1 = column; + + /* Pad to the middle */ + while (column < middle) { + fputc(' ', out); + ++column; + } + fputs("; ", out); + column += 3; + + /* Print one line from pattern, pattern lines are terminated by ';' */ + while (*pattern && *pattern != ';') { + if (pattern == pattern_pos) + mark2 = column; + fputc(*pattern, out); + ++pattern; + ++column; + } + if (pattern == pattern_pos) + mark2 = column; + + fputc('\n', out); + if (*pattern) + ++pattern; + if (*text) + ++text; + + /* If pattern and text diverge at this line, print an + * additional line with '^' marks, highlighting + * positions where match fails. + */ + if (mark1 > 0 || mark2 > 0) { + for (column = 0; column <= max(mark1, mark2); ++column) { + if (column == mark1 || column == mark2) + fputc('^', out); + else + fputc(' ', out); + } + fputc('\n', out); + } + } +} + +/* Test if `text` matches `pattern`. Pattern consists of the following elements: + * + * - Field offset references: + * + * :: + * + * When such reference is encountered BTF is used to compute numerical + * value for the offset of in . The `text` is expected to + * contain matching numerical value. + * + * - Field groups: + * + * $(:: [+ ::]*) + * + * Allows to specify an offset that is a sum of multiple field offsets. + * The `text` is expected to contain matching numerical value. + * + * - Variable references, e.g. `$src`, `$dst`, `$ctx`. + * These are substitutions specified in `reg_map` array. + * If a substring of pattern is equal to `reg_map[i][0]` the `text` is + * expected to contain `reg_map[i][1]` in the matching position. + * + * - Whitespace is ignored, ';' counts as whitespace for `pattern`. + * + * - Any other characters, `pattern` and `text` should match one-to-one. + * + * Example of a pattern: + * + * __________ fields group ________________ + * ' ' + * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src; + * ^^^^ '______________________' + * variable reference field offset reference + */ +static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2]) +{ + char *pattern_origin = pattern; + char *text_origin = text; + regmatch_t matches[3]; + +_continue: + while (*pattern) { + if (!*text) + goto err; + + /* Skip whitespace */ + if (isspace(*pattern) || *pattern == ';') { + if (!isspace(*text) && text != text_origin && isalnum(text[-1])) + goto err; + pattern = skip_space_and_semi(pattern); + text = skip_space(text); + continue; + } + + /* Check for variable references */ + for (int i = 0; reg_map[i][0]; ++i) { + char *pattern_next, *text_next; + + pattern_next = match_str(pattern, reg_map[i][0]); + if (!pattern_next) + continue; + + text_next = match_str(text, reg_map[i][1]); + if (!text_next) + goto err; + + pattern = pattern_next; + text = text_next; + goto _continue; + } + + /* Match field group: + * $(sk_buff::cb + qdisc_skb_cb::tc_classid) + */ + if (strncmp(pattern, "$(", 2) == 0) { + char *group_start = pattern, *text_next; + int acc_offset = 0; + + pattern += 2; + + for (;;) { + int field_offset; + + pattern = skip_space(pattern); + if (!*pattern) { + PRINT_FAIL("Unexpected end of pattern\n"); + goto err; + } + + if (*pattern == ')') { + ++pattern; + break; + } + + if (*pattern == '+') { + ++pattern; + continue; + } + + printf("pattern: %s\n", pattern); + if (regexec(field_regex, pattern, 3, matches, 0) != 0) { + PRINT_FAIL("Field reference expected\n"); + goto err; + } + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + pattern += matches[0].rm_eo; + acc_offset += field_offset; + } + + text_next = match_number(text, acc_offset); + if (!text_next) { + PRINT_FAIL("No match for group offset %.*s (%d)\n", + (int)(pattern - group_start), + group_start, + acc_offset); + goto err; + } + text = text_next; + } + + /* Match field reference: + * sk_buff::cb + */ + if (regexec(field_regex, pattern, 3, matches, 0) == 0) { + int field_offset; + char *text_next; + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + text_next = match_number(text, field_offset); + if (!text_next) { + PRINT_FAIL("No match for field offset %.*s (%d)\n", + (int)matches[0].rm_eo, pattern, field_offset); + goto err; + } + + pattern += matches[0].rm_eo; + text = text_next; + continue; + } + + /* If pattern points to identifier not followed by '::' + * skip the identifier to avoid n^2 application of the + * field reference rule. + */ + if (regexec(ident_regex, pattern, 1, matches, 0) == 0) { + if (strncmp(pattern, text, matches[0].rm_eo) != 0) + goto err; + + pattern += matches[0].rm_eo; + text += matches[0].rm_eo; + continue; + } + + /* Match literally */ + if (*pattern != *text) + goto err; + + ++pattern; + ++text; + } + + return true; + +err: + test__fail(); + print_match_error(stdout, pattern_origin, text_origin, pattern, text); + return false; +} + +/* Request BPF program instructions after all rewrites are applied, + * e.g. verifier.c:convert_ctx_access() is done. + */ +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static void print_insn(void *private_data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf((FILE *)private_data, fmt, args); + va_end(args); +} + +/* Disassemble instructions to a stream */ +static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = NULL, + .cb_imm = NULL, + .private_data = out, + }; + bool double_insn = false; + int i; + + for (i = 0; i < len; i++) { + if (double_insn) { + double_insn = false; + continue; + } + + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); + print_bpf_insn(&cbs, insn + i, true); + } +} + +/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix + * for each instruction (FF stands for instruction `code` byte). + * This function removes the prefix inplace for each line in `str`. + */ +static void remove_insn_prefix(char *str, int size) +{ + const int prefix_size = 5; + + int write_pos = 0, read_pos = prefix_size; + int len = strlen(str); + char c; + + size = min(size, len); + + while (read_pos < size) { + c = str[read_pos++]; + if (c == 0) + break; + str[write_pos++] = c; + if (c == '\n') + read_pos += prefix_size; + } + str[write_pos] = 0; +} + +struct prog_info { + char *prog_kind; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + struct bpf_insn *prog; + u32 prog_len; +}; + +static void match_program(struct btf *btf, + struct prog_info *pinfo, + char *pattern, + char *reg_map[][2], + bool skip_first_insn) +{ + struct bpf_insn *buf = NULL; + int err = 0, prog_fd = 0; + FILE *prog_out = NULL; + char *text = NULL; + __u32 cnt = 0; + + text = calloc(MAX_PROG_TEXT_SZ, 1); + if (!text) { + PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ); + goto out; + } + + // TODO: log level + LIBBPF_OPTS(bpf_prog_load_opts, opts); + opts.log_buf = text; + opts.log_size = MAX_PROG_TEXT_SZ; + opts.log_level = 1 | 2 | 4; + opts.expected_attach_type = pinfo->expected_attach_type; + + prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL", + pinfo->prog, pinfo->prog_len, &opts); + if (prog_fd < 0) { + PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n", + errno, strerror(errno), text); + goto out; + } + + memset(text, 0, MAX_PROG_TEXT_SZ); + + err = get_xlated_program(prog_fd, &buf, &cnt); + if (err) { + PRINT_FAIL("Can't load back BPF program\n"); + goto out; + } + + prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w"); + if (!prog_out) { + PRINT_FAIL("Can't open memory stream\n"); + goto out; + } + if (skip_first_insn) + print_xlated(prog_out, buf + 1, cnt - 1); + else + print_xlated(prog_out, buf, cnt); + fclose(prog_out); + remove_insn_prefix(text, MAX_PROG_TEXT_SZ); + + ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map), + pinfo->prog_kind); + +out: + if (prog_fd) + close(prog_fd); + free(buf); + free(text); +} + +static void run_one_testcase(struct btf *btf, struct test_case *test) +{ + struct prog_info pinfo = {}; + int bpf_sz; + + if (!test__start_subtest(test->name)) + return; + + switch (test->field_sz) { + case 8: + bpf_sz = BPF_DW; + break; + case 4: + bpf_sz = BPF_W; + break; + case 2: + bpf_sz = BPF_H; + break; + case 1: + bpf_sz = BPF_B; + break; + default: + PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz); + return; + } + + pinfo.prog_type = test->prog_type; + pinfo.expected_attach_type = test->expected_attach_type; + + if (test->read) { + struct bpf_insn ldx_prog[] = { + BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *reg_map[][2] = { + { "$ctx", "r1" }, + { "$dst", "r2" }, + {} + }; + + pinfo.prog_kind = "LDX"; + pinfo.prog = ldx_prog; + pinfo.prog_len = ARRAY_SIZE(ldx_prog); + match_program(btf, &pinfo, test->read, reg_map, false); + } + + if (test->write || test->write_st || test->write_stx) { + struct bpf_insn stx_prog[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *stx_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "r2" }, + {} + }; + struct bpf_insn st_prog[] = { + BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset, + test->st_value.use ? test->st_value.value : 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *st_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "42" }, + {} + }; + + if (test->write || test->write_stx) { + char *pattern = test->write_stx ? test->write_stx : test->write; + + pinfo.prog_kind = "STX"; + pinfo.prog = stx_prog; + pinfo.prog_len = ARRAY_SIZE(stx_prog); + match_program(btf, &pinfo, pattern, stx_reg_map, true); + } + + if (test->write || test->write_st) { + char *pattern = test->write_st ? test->write_st : test->write; + + pinfo.prog_kind = "ST"; + pinfo.prog = st_prog; + pinfo.prog_len = ARRAY_SIZE(st_prog); + match_program(btf, &pinfo, pattern, st_reg_map, false); + } + } + + test__end_subtest(); +} + +void test_ctx_rewrite(void) +{ + struct btf *btf; + int i; + + field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)"); + ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+"); + if (!field_regex || !ident_regex) + return; + + btf = btf__load_vmlinux_btf(); + if (!btf) { + PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno)); + goto out; + } + + for (i = 0; i < ARRAY_SIZE(test_cases); ++i) + run_one_testcase(btf, &test_cases[i]); + +out: + btf__free(btf); + free_regex(field_regex); + free_regex(ident_regex); +} -- GitLab From d54e0f6c1adffbf72f2cf4aebe6122899c3b851c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:49:59 -0800 Subject: [PATCH 0125/2078] bpf: improve stack slot state printing Improve stack slot state printing to provide more useful and relevant information, especially for dynptrs. While previously we'd see something like: 8: (85) call bpf_ringbuf_reserve_dynptr#198 ; R0_w=scalar() fp-8_w=dddddddd fp-16_w=dddddddd refs=2 Now we'll see way more useful: 8: (85) call bpf_ringbuf_reserve_dynptr#198 ; R0_w=scalar() fp-16_w=dynptr_ringbuf(ref_id=2) refs=2 I experimented with printing the range of slots taken by dynptr, something like: fp-16..8_w=dynptr_ringbuf(ref_id=2) But it felt very awkward and pretty useless. So we print the lowest address (most negative offset) only. The general structure of this code is now also set up for easier extension and will accommodate ITER slots naturally. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 75 ++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4c5d2b5e25c8d..4f71b6b61ef4e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -705,6 +705,25 @@ static const char *kernel_type_name(const struct btf* btf, u32 id) return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } +static const char *dynptr_type_str(enum bpf_dynptr_type type) +{ + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + return "local"; + case BPF_DYNPTR_TYPE_RINGBUF: + return "ringbuf"; + case BPF_DYNPTR_TYPE_SKB: + return "skb"; + case BPF_DYNPTR_TYPE_XDP: + return "xdp"; + case BPF_DYNPTR_TYPE_INVALID: + return ""; + default: + WARN_ONCE(1, "unknown dynptr type %d\n", type); + return ""; + } +} + static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; @@ -1176,26 +1195,49 @@ static void print_verifier_state(struct bpf_verifier_env *env, for (j = 0; j < BPF_REG_SIZE; j++) { if (state->stack[i].slot_type[j] != STACK_INVALID) valid = true; - types_buf[j] = slot_type_char[ - state->stack[i].slot_type[j]]; + types_buf[j] = slot_type_char[state->stack[i].slot_type[j]]; } types_buf[BPF_REG_SIZE] = 0; if (!valid) continue; if (!print_all && !stack_slot_scratched(env, i)) continue; - verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); - print_liveness(env, state->stack[i].spilled_ptr.live); - if (is_spilled_reg(&state->stack[i])) { + switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) { + case STACK_SPILL: reg = &state->stack[i].spilled_ptr; t = reg->type; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) verbose(env, "%lld", reg->var_off.value + reg->off); - } else { + break; + case STACK_DYNPTR: + i += BPF_DYNPTR_NR_SLOTS - 1; + reg = &state->stack[i].spilled_ptr; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type)); + if (reg->ref_obj_id) + verbose(env, "(ref_id=%d)", reg->ref_obj_id); + break; + case STACK_MISC: + case STACK_ZERO: + default: + reg = &state->stack[i].spilled_ptr; + + for (j = 0; j < BPF_REG_SIZE; j++) + types_buf[j] = slot_type_char[state->stack[i].slot_type[j]]; + types_buf[BPF_REG_SIZE] = 0; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); verbose(env, "=%s", types_buf); + break; } } if (state->acquired_refs && state->refs[0].id) { @@ -6411,28 +6453,9 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */ if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) { - const char *err_extra = ""; - - switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { - case DYNPTR_TYPE_LOCAL: - err_extra = "local"; - break; - case DYNPTR_TYPE_RINGBUF: - err_extra = "ringbuf"; - break; - case DYNPTR_TYPE_SKB: - err_extra = "skb "; - break; - case DYNPTR_TYPE_XDP: - err_extra = "xdp "; - break; - default: - err_extra = ""; - break; - } verbose(env, "Expected a dynptr of type %s as arg #%d\n", - err_extra, regno); + dynptr_type_str(arg_to_dynptr_type(arg_type)), regno); return -EINVAL; } -- GitLab From 567da5d253cd6b41c6d015adac1af653725bef9d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:00 -0800 Subject: [PATCH 0126/2078] bpf: improve regsafe() checks for PTR_TO_{MEM,BUF,TP_BUFFER} Teach regsafe() logic to handle PTR_TO_MEM, PTR_TO_BUF, and PTR_TO_TP_BUFFER similarly to PTR_TO_MAP_{KEY,VALUE}. That is, instead of exact match for var_off and range, use tnum_in() and range_within() checks, allowing more general verified state to subsume more specific current state. This allows to match wider range of valid and safe states, speeding up verification and detecting wider range of equivalent states for upcoming open-coded iteration looping logic. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4f71b6b61ef4e..b071b922848b8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14209,13 +14209,17 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, tnum_in(rold->var_off, rcur->var_off); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_BUF: + case PTR_TO_TP_BUFFER: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. */ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off) && - check_ids(rold->id, rcur->id, idmap); + check_ids(rold->id, rcur->id, idmap) && + check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: /* We must have at least as much range as the old ptr -- GitLab From 6f876e75d316a75957f3d43c3a8c2a6fe9bc18b2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:01 -0800 Subject: [PATCH 0127/2078] selftests/bpf: enhance align selftest's expected log matching Allow to search for expected register state in all the verifier log output that's related to specified instruction number. See added comment for an example of possible situation that is happening due to a simple enhancement done in the next patch, which fixes handling of env->test_state_freq flag in state checkpointing logic. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/align.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 4666f88f2bb4f..c94fa8d6c4f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -660,16 +660,22 @@ static int do_test_single(struct bpf_align_test *test) * func#0 @0 * 0: R1=ctx(off=0,imm=0) R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 + * + * Sometimes it's actually two lines below, e.g. when + * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": + * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - if (!strstr(line_ptr, m.match)) { + while (!strstr(line_ptr, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); - sscanf(line_ptr, "%u: ", &cur_line); + sscanf(line_ptr ?: "", "%u: ", &cur_line); + if (!line_ptr || cur_line != m.line) + break; } - if (cur_line != m.line || !line_ptr || - !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", - m.line, m.match); + if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { + printf("Failed to find match %u: %s\n", m.line, m.match); ret = 1; printf("%s", bpf_vlog); break; -- GitLab From 98ddcf389d1bb7a407d49c23dfe6443680812f24 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:02 -0800 Subject: [PATCH 0128/2078] bpf: honor env->test_state_freq flag in is_state_visited() env->test_state_freq flag can be set by user by passing BPF_F_TEST_STATE_FREQ program flag. This is used in a bunch of selftests to have predictable state checkpoints at every jump and so on. Currently, bounded loop handling heuristic ignores this flag if number of processed jumps and/or number of processed instructions is below some thresholds, which throws off that reliable state checkpointing. Honor this flag in all circumstances by disabling heuristic if env->test_state_freq is set. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b071b922848b8..fa93ba10762d4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14651,7 +14651,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * This threshold shouldn't be too high either, since states * at the end of the loop are likely to be useful in pruning. */ - if (env->jmps_processed - env->prev_jmps_processed < 20 && + if (!env->test_state_freq && + env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) add_new_state = false; goto miss; -- GitLab From fffc893b6bf29162aca76842238868b131fcb477 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:03 -0800 Subject: [PATCH 0129/2078] selftests/bpf: adjust log_fixup's buffer size for proper truncation Adjust log_fixup's expected buffer length to fix the test. It's pretty finicky in its length expectation, but it doesn't break often. So just adjust the length to work on current kernel and with follow up iterator changes as well. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/log_fixup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index f4ffdcabf4e48..239e1c5753b09 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -141,7 +141,7 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_partial")) bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) -- GitLab From 653ae3a874aca6764a4c1f5a8bf1b072ade0d6f4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:04 -0800 Subject: [PATCH 0130/2078] bpf: clean up visit_insn()'s instruction processing Instead of referencing processed instruction repeatedly as insns[t] throughout entire visit_insn() function, take a local insn pointer and work with it in a cleaner way. It makes enhancing this function further a bit easier as well. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fa93ba10762d4..6188d5604ed47 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13484,44 +13484,43 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, */ static int visit_insn(int t, struct bpf_verifier_env *env) { - struct bpf_insn *insns = env->prog->insnsi; + struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; int ret; - if (bpf_pseudo_func(insns + t)) + if (bpf_pseudo_func(insn)) return visit_func_call_insn(t, insns, env, true); /* All non-branch instructions have a single fall-through edge. */ - if (BPF_CLASS(insns[t].code) != BPF_JMP && - BPF_CLASS(insns[t].code) != BPF_JMP32) + if (BPF_CLASS(insn->code) != BPF_JMP && + BPF_CLASS(insn->code) != BPF_JMP32) return push_insn(t, t + 1, FALLTHROUGH, env, false); - switch (BPF_OP(insns[t].code)) { + switch (BPF_OP(insn->code)) { case BPF_EXIT: return DONE_EXPLORING; case BPF_CALL: - if (insns[t].imm == BPF_FUNC_timer_set_callback) + if (insn->imm == BPF_FUNC_timer_set_callback) /* Mark this call insn as a prune point to trigger * is_state_visited() check before call itself is * processed by __check_func_call(). Otherwise new * async state will be pushed for further exploration. */ mark_prune_point(env, t); - return visit_func_call_insn(t, insns, env, - insns[t].src_reg == BPF_PSEUDO_CALL); + return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); case BPF_JA: - if (BPF_SRC(insns[t].code) != BPF_K) + if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; /* unconditional jump with single edge */ - ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env, + ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env, true); if (ret) return ret; - mark_prune_point(env, t + insns[t].off + 1); - mark_jmp_point(env, t + insns[t].off + 1); + mark_prune_point(env, t + insn->off + 1); + mark_jmp_point(env, t + insn->off + 1); return ret; @@ -13533,7 +13532,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) if (ret) return ret; - return push_insn(t, t + insns[t].off + 1, BRANCH, env, true); + return push_insn(t, t + insn->off + 1, BRANCH, env, true); } } -- GitLab From c1ee85a9806a720aa054f68fe7f9c79418f36c2b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:05 -0800 Subject: [PATCH 0131/2078] bpf: fix visit_insn()'s detection of BPF_FUNC_timer_set_callback helper It's not correct to assume that any BPF_CALL instruction is a helper call. Fix visit_insn()'s detection of bpf_timer_set_callback() helper by also checking insn->code == 0. For kfuncs insn->code would be set to BPF_PSEUDO_KFUNC_CALL, and for subprog calls it will be BPF_PSEUDO_CALL. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6188d5604ed47..34fd808e9692c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13500,7 +13500,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) return DONE_EXPLORING; case BPF_CALL: - if (insn->imm == BPF_FUNC_timer_set_callback) + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) /* Mark this call insn as a prune point to trigger * is_state_visited() check before call itself is * processed by __check_func_call(). Otherwise new -- GitLab From 553a64a85c5d1dac277325a0f51a31c056593048 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:06 -0800 Subject: [PATCH 0132/2078] bpf: ensure that r0 is marked scratched after any function call r0 is important (unless called function is void-returning, but that's taken care of by print_verifier_state() anyways) in verifier logs. Currently for helpers we seem to print it in verifier log, but for kfuncs we don't. Instead of figuring out where in the maze of code we accidentally set r0 as scratched for helpers and why we don't do that for kfuncs, just enforce that after any function call r0 is marked as scratched. Also, perhaps, we should reconsider "scratched" terminology, as it's mightily confusing. "Touched" would seem more appropriate. But I left that for follow ups for now. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 34fd808e9692c..db0c37e9bc3aa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15110,6 +15110,8 @@ static int do_check(struct bpf_verifier_env *env) err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; + + mark_reg_scratched(env, BPF_REG_0); } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || -- GitLab From d0e1ac227945c6af616c003365c6feb986dc0839 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:07 -0800 Subject: [PATCH 0133/2078] bpf: move kfunc_call_arg_meta higher in the file Move struct bpf_kfunc_call_arg_meta higher in the file and put it next to struct bpf_call_arg_meta, so it can be used from more functions. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 70 +++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db0c37e9bc3aa..ed9a96ad7b5be 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -270,6 +270,41 @@ struct bpf_call_arg_meta { struct btf_field *kptr_field; }; +struct bpf_kfunc_call_arg_meta { + /* In parameters */ + struct btf *btf; + u32 func_id; + u32 kfunc_flags; + const struct btf_type *func_proto; + const char *func_name; + /* Out parameters */ + u32 ref_obj_id; + u8 release_regno; + bool r0_rdonly; + u32 ret_btf_id; + u64 r0_size; + u32 subprogno; + struct { + u64 value; + bool found; + } arg_constant; + struct { + struct btf *btf; + u32 btf_id; + } arg_obj_drop; + struct { + struct btf_field *field; + } arg_list_head; + struct { + struct btf_field *field; + } arg_rbtree_root; + struct { + enum bpf_dynptr_type type; + u32 id; + } initialized_dynptr; + u64 mem_size; +}; + struct btf *btf_vmlinux; static DEFINE_MUTEX(bpf_verifier_lock); @@ -8811,41 +8846,6 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } -struct bpf_kfunc_call_arg_meta { - /* In parameters */ - struct btf *btf; - u32 func_id; - u32 kfunc_flags; - const struct btf_type *func_proto; - const char *func_name; - /* Out parameters */ - u32 ref_obj_id; - u8 release_regno; - bool r0_rdonly; - u32 ret_btf_id; - u64 r0_size; - u32 subprogno; - struct { - u64 value; - bool found; - } arg_constant; - struct { - struct btf *btf; - u32 btf_id; - } arg_obj_drop; - struct { - struct btf_field *field; - } arg_list_head; - struct { - struct btf_field *field; - } arg_rbtree_root; - struct { - enum bpf_dynptr_type type; - u32 id; - } initialized_dynptr; - u64 mem_size; -}; - static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & KF_ACQUIRE; -- GitLab From d5271c5b1950b887def1663b75e2d710cc16535f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:08 -0800 Subject: [PATCH 0134/2078] bpf: mark PTR_TO_MEM as non-null register type PTR_TO_MEM register without PTR_MAYBE_NULL is indeed non-null. This is important for BPF verifier to be able to prune guaranteed not to be taken branches. This is always the case with open-coded iterators. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-11-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ed9a96ad7b5be..d95975cbcc190 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -487,7 +487,8 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || - type == PTR_TO_SOCK_COMMON; + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_MEM; } static bool type_is_ptr_alloc_obj(u32 type) -- GitLab From a461f5adf17756e99ee0903d1a40961b0342ebb3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:09 -0800 Subject: [PATCH 0135/2078] bpf: generalize dynptr_get_spi to be usable for iters Generalize the logic of fetching special stack slot object state using spi (stack slot index). This will be used by STACK_ITER logic next. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-12-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d95975cbcc190..c4151c9efe245 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -710,32 +710,38 @@ static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_sl return spi - nr_slots + 1 >= 0 && spi < allocated_slots; } -static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + const char *obj_kind, int nr_slots) { int off, spi; if (!tnum_is_const(reg->var_off)) { - verbose(env, "dynptr has to be at a constant offset\n"); + verbose(env, "%s has to be at a constant offset\n", obj_kind); return -EINVAL; } off = reg->off + reg->var_off.value; if (off % BPF_REG_SIZE) { - verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); return -EINVAL; } spi = __get_spi(off); - if (spi < 1) { - verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + if (spi + 1 < nr_slots) { + verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); return -EINVAL; } - if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS)) + if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots)) return -ERANGE; return spi; } +static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); +} + static const char *kernel_type_name(const struct btf* btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); -- GitLab From f4b4eee6169bb33c5157ebe07e53d7e4be7631c0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 2 Mar 2023 15:50:10 -0800 Subject: [PATCH 0136/2078] bpf: add support for fixed-size memory pointer returns for kfuncs Support direct fixed-size (and for now, read-only) memory access when kfunc's return type is a pointer to non-struct type. Calculate type size and let BPF program access that many bytes directly. This is crucial for numbers iterator. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230302235015.2044271-13-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4151c9efe245..b2116ca78d9a3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10336,6 +10336,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } } else if (!__btf_type_is_struct(ptr_type)) { + if (!meta.r0_size) { + __u32 sz; + + if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) { + meta.r0_size = sz; + meta.r0_rdonly = true; + } + } if (!meta.r0_size) { ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); -- GitLab From 5c48f9432d06bf85ca934d7c4ecb14a7ec0c7f5d Mon Sep 17 00:00:00 2001 From: Chih-Kang Chang Date: Fri, 24 Feb 2023 16:21:17 +0800 Subject: [PATCH 0137/2078] wifi: rtw89: fix SER L1 might stop entering LPS issue When SER L1 triggered, driver need to stop Rx and clear RTW89_FLAG_RUNNING flag. If track_work check RTW89_FLAG_RUNNING simultaneously, it will check failed and never queue track_work again, and LPS won't enter either. Therefore, we cancel delayed work when enter SER L1, and queue delayed work when leave SER L1. Signed-off-by: Chih-Kang Chang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230224082117.21241-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/ser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/ser.c b/drivers/net/wireless/realtek/rtw89/ser.c index 61db7189fdab8..9e9f6947e7f14 100644 --- a/drivers/net/wireless/realtek/rtw89/ser.c +++ b/drivers/net/wireless/realtek/rtw89/ser.c @@ -414,8 +414,11 @@ static void ser_idle_st_hdl(struct rtw89_ser *ser, u8 evt) static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt) { + struct rtw89_dev *rtwdev = container_of(ser, struct rtw89_dev, ser); + switch (evt) { case SER_EV_STATE_IN: + cancel_delayed_work_sync(&rtwdev->track_work); drv_stop_tx(ser); if (hal_stop_dma(ser)) { @@ -446,6 +449,8 @@ static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt) hal_enable_dma(ser); drv_resume_rx(ser); drv_resume_tx(ser); + ieee80211_queue_delayed_work(rtwdev->hw, &rtwdev->track_work, + RTW89_TRACK_WORK_PERIOD); break; default: -- GitLab From e5c3da9abd44bee298070d4e66b502067a8f1715 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 24 Feb 2023 13:59:17 +0100 Subject: [PATCH 0138/2078] wifi: brcmfmac: pcie: Add 4359C0 firmware definition Some phones from around 2016, as well as other random devices have this chip called 43956 or 4359C0 or 43596A0, which is more or less just a rev bump (v9) of the already-supported 4359. Add a corresponding firmware definition to allow for choosing the correct blob. Suggested-by: Arend van Spriel Signed-off-by: Konrad Dybcio Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230224-topic-brcm_tone-v1-1-333b0ac67934@linaro.org --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 848f05ec1268c..59f3e9c5e1390 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -58,6 +58,7 @@ BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-pcie"); BRCMF_FW_CLM_DEF(43570, "brcmfmac43570-pcie"); BRCMF_FW_DEF(4358, "brcmfmac4358-pcie"); BRCMF_FW_DEF(4359, "brcmfmac4359-pcie"); +BRCMF_FW_DEF(4359C, "brcmfmac4359c-pcie"); BRCMF_FW_CLM_DEF(4364B2, "brcmfmac4364b2-pcie"); BRCMF_FW_CLM_DEF(4364B3, "brcmfmac4364b3-pcie"); BRCMF_FW_DEF(4365B, "brcmfmac4365b-pcie"); @@ -92,7 +93,8 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43570), BRCMF_FW_ENTRY(BRCM_CC_43570_CHIP_ID, 0xFFFFFFFF, 43570), BRCMF_FW_ENTRY(BRCM_CC_4358_CHIP_ID, 0xFFFFFFFF, 4358), - BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359), + BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0x000001FF, 4359), + BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFE00, 4359C), BRCMF_FW_ENTRY(BRCM_CC_4364_CHIP_ID, 0x0000000F, 4364B2), /* 3 */ BRCMF_FW_ENTRY(BRCM_CC_4364_CHIP_ID, 0xFFFFFFF0, 4364B3), /* 4 */ BRCMF_FW_ENTRY(BRCM_CC_4365_CHIP_ID, 0x0000000F, 4365B), -- GitLab From b7ed9fa2cb76ca7a3c3cd4a6d35748fe1fbda9f6 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 26 Feb 2023 23:10:03 +0100 Subject: [PATCH 0139/2078] wifi: rtw88: mac: Return the original error from rtw_pwr_seq_parser() rtw_pwr_seq_parser() calls rtw_sub_pwr_seq_parser() which can either return -EBUSY, -EINVAL or 0. Propagate the original error code instead of unconditionally returning -EBUSY in case of an error. Fixes: e3037485c68e ("rtw88: new Realtek 802.11ac driver") Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230226221004.138331-2-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c index 1c9530a0eb693..4749d75fefee2 100644 --- a/drivers/net/wireless/realtek/rtw88/mac.c +++ b/drivers/net/wireless/realtek/rtw88/mac.c @@ -236,7 +236,7 @@ static int rtw_pwr_seq_parser(struct rtw_dev *rtwdev, ret = rtw_sub_pwr_seq_parser(rtwdev, intf_mask, cut_mask, cmd); if (ret) - return -EBUSY; + return ret; idx++; } while (1); -- GitLab From 15c8e267dfa62f207ee1db666c822324e3362b84 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 26 Feb 2023 23:10:04 +0100 Subject: [PATCH 0140/2078] wifi: rtw88: mac: Return the original error from rtw_mac_power_switch() rtw_mac_power_switch() calls rtw_pwr_seq_parser() which can return -EINVAL, -EBUSY or 0. Propagate the original error code instead of unconditionally returning -EINVAL in case of an error. Fixes: e3037485c68e ("rtw88: new Realtek 802.11ac driver") Signed-off-by: Martin Blumenstingl Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230226221004.138331-3-martin.blumenstingl@googlemail.com --- drivers/net/wireless/realtek/rtw88/mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c index 4749d75fefee2..f3a566cf979b5 100644 --- a/drivers/net/wireless/realtek/rtw88/mac.c +++ b/drivers/net/wireless/realtek/rtw88/mac.c @@ -250,6 +250,7 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on) const struct rtw_pwr_seq_cmd **pwr_seq; u8 rpwm; bool cur_pwr; + int ret; if (rtw_chip_wcpu_11ac(rtwdev)) { rpwm = rtw_read8(rtwdev, rtwdev->hci.rpwm_addr); @@ -273,8 +274,9 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on) return -EALREADY; pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq; - if (rtw_pwr_seq_parser(rtwdev, pwr_seq)) - return -EINVAL; + ret = rtw_pwr_seq_parser(rtwdev, pwr_seq); + if (ret) + return ret; if (pwr_on) set_bit(RTW_FLAG_POWERON, rtwdev->flags); -- GitLab From ff6f38eb920bd2b86e1d3157a01f409e2dce1320 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 2 Mar 2023 10:39:11 +0800 Subject: [PATCH 0141/2078] wifi: rtlwifi: rtl8192se: Remove some unused variables Variables bcntime_cfg, bcn_cw and bcn_ifs are not effectively used, so delete them. drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c:1555:6: warning: variable 'bcntime_cfg' set but not used. Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4240 Signed-off-by: Jiapeng Chong Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230302023911.59278-1-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index bd0b7e365edb0..a8b5bf45b1bba 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -1552,8 +1552,6 @@ void rtl92se_set_beacon_related_registers(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); - u16 bcntime_cfg = 0; - u16 bcn_cw = 6, bcn_ifs = 0xf; u16 atim_window = 2; /* ATIM Window (in unit of TU). */ @@ -1576,13 +1574,6 @@ void rtl92se_set_beacon_related_registers(struct ieee80211_hw *hw) * other ad hoc STA */ rtl_write_byte(rtlpriv, BCN_ERR_THRESH, 100); - /* Beacon Time Configuration */ - if (mac->opmode == NL80211_IFTYPE_ADHOC) - bcntime_cfg |= (bcn_cw << BCN_TCFG_CW_SHIFT); - - /* TODO: bcn_ifs may required to be changed on ASIC */ - bcntime_cfg |= bcn_ifs << BCN_TCFG_IFS; - /*for beacon changed */ rtl92s_phy_set_beacon_hwreg(hw, mac->beacon_interval); } -- GitLab From fd4cb29f2a3d54ec7b4e012300321601af10bd68 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 4 Mar 2023 23:17:04 +0800 Subject: [PATCH 0142/2078] tools/resolve_btfids: Add /libsubcmd to .gitignore Add libsubcmd to .gitignore, otherwise after compiling the kernel it would result in the following: # bpf-next...bpf-next/master ?? tools/bpf/resolve_btfids/libsubcmd/ Signed-off-by: Rong Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/tencent_F13D670D5D7AA9C4BD868D3220921AAC090A@qq.com --- tools/bpf/resolve_btfids/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore index 16913fffc9859..52d5e9721d921 100644 --- a/tools/bpf/resolve_btfids/.gitignore +++ b/tools/bpf/resolve_btfids/.gitignore @@ -1,3 +1,4 @@ /fixdep /resolve_btfids /libbpf/ +/libsubcmd/ -- GitLab From f8b299bc6a0f8695d4005ecff29773c79a1d29af Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Mar 2023 14:48:31 +0800 Subject: [PATCH 0143/2078] libbpf: Add support to set kprobe/uprobe attach mode By default, libbpf will attach the kprobe/uprobe BPF program in the latest mode that supported by kernel. In this patch, we add the support to let users manually attach kprobe/uprobe in legacy or perf mode. There are 3 mode that supported by the kernel to attach kprobe/uprobe: LEGACY: create perf event in legacy way and don't use bpf_link PERF: create perf event with perf_event_open() and don't use bpf_link Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Reviewed-by: Biao Jiang Link: create perf event with perf_event_open() and use bpf_link Link: https://lore.kernel.org/bpf/20230113093427.1666466-1-imagedong@tencent.com/ Link: https://lore.kernel.org/bpf/20230306064833.7932-2-imagedong@tencent.com Users now can manually choose the mode with bpf_program__attach_uprobe_opts()/bpf_program__attach_kprobe_opts(). --- tools/lib/bpf/libbpf.c | 48 +++++++++++++++++++++++++++++++++++++++- tools/lib/bpf/libbpf.h | 50 +++++++++++++++++++++++++++++++----------- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dacaae31b76a5..a557718401e4c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9724,6 +9724,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; + bool force_ioctl_attach; if (!OPTS_VALID(opts, bpf_perf_event_opts)) return libbpf_err_ptr(-EINVAL); @@ -9747,7 +9748,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link->link.dealloc = &bpf_link_perf_dealloc; link->perf_event_fd = pfd; - if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { + force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); + if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); @@ -10106,6 +10108,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const struct bpf_kprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); + enum probe_attach_mode attach_mode; char errmsg[STRERR_BUFSIZE]; char *legacy_probe = NULL; struct bpf_link *link; @@ -10116,11 +10119,32 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_kprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); legacy = determine_kprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, offset, @@ -10852,6 +10876,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *archive_path = NULL, *archive_sep = NULL; char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); + enum probe_attach_mode attach_mode; char full_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; @@ -10862,6 +10887,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); @@ -10903,6 +10929,26 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } legacy = determine_uprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2efd80f6f7b92..db4992a036f8b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -447,12 +447,15 @@ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); struct bpf_perf_event_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; + /* don't use BPF link when attach BPF program */ + bool force_ioctl_attach; + size_t :0; }; -#define bpf_perf_event_opts__last_field bpf_cookie +#define bpf_perf_event_opts__last_field force_ioctl_attach LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); @@ -461,8 +464,25 @@ LIBBPF_API struct bpf_link * bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts); +/** + * enum probe_attach_mode - the mode to attach kprobe/uprobe + * + * force libbpf to attach kprobe/uprobe in specific mode, -ENOTSUP will + * be returned if it is not supported by the kernel. + */ +enum probe_attach_mode { + /* attach probe in latest supported mode by kernel */ + PROBE_ATTACH_MODE_DEFAULT = 0, + /* attach probe in legacy mode, using debugfs/tracefs */ + PROBE_ATTACH_MODE_LEGACY, + /* create perf event with perf_event_open() syscall */ + PROBE_ATTACH_MODE_PERF, + /* attach probe with BPF link */ + PROBE_ATTACH_MODE_LINK, +}; + struct bpf_kprobe_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; @@ -470,9 +490,11 @@ struct bpf_kprobe_opts { size_t offset; /* kprobe is return probe */ bool retprobe; + /* kprobe attach mode */ + enum probe_attach_mode attach_mode; size_t :0; }; -#define bpf_kprobe_opts__last_field retprobe +#define bpf_kprobe_opts__last_field attach_mode LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, @@ -506,7 +528,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, const struct bpf_kprobe_multi_opts *opts); struct bpf_ksyscall_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; @@ -552,7 +574,7 @@ bpf_program__attach_ksyscall(const struct bpf_program *prog, const struct bpf_ksyscall_opts *opts); struct bpf_uprobe_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* offset of kernel reference counted USDT semaphore, added in * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") @@ -570,9 +592,11 @@ struct bpf_uprobe_opts { * binary_path. */ const char *func_name; + /* uprobe attach mode */ + enum probe_attach_mode attach_mode; size_t :0; }; -#define bpf_uprobe_opts__last_field func_name +#define bpf_uprobe_opts__last_field attach_mode /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program @@ -646,7 +670,7 @@ bpf_program__attach_usdt(const struct bpf_program *prog, const struct bpf_usdt_opts *opts); struct bpf_tracepoint_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; @@ -1110,7 +1134,7 @@ struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); struct ring_buffer_opts { - size_t sz; /* size of this struct, for forward/backward compatiblity */ + size_t sz; /* size of this struct, for forward/backward compatibility */ }; #define ring_buffer_opts__last_field sz @@ -1475,7 +1499,7 @@ LIBBPF_API void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s); struct gen_loader_opts { - size_t sz; /* size of this struct, for forward/backward compatiblity */ + size_t sz; /* size of this struct, for forward/backward compatibility */ const char *data; const char *insns; __u32 data_sz; @@ -1493,13 +1517,13 @@ enum libbpf_tristate { }; struct bpf_linker_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; }; #define bpf_linker_opts__last_field sz struct bpf_linker_file_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; }; #define bpf_linker_file_opts__last_field sz @@ -1542,7 +1566,7 @@ typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cook struct bpf_link **link); struct libbpf_prog_handler_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* User-provided value that is passed to prog_setup_fn, * prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to -- GitLab From 7391ec6391e2b129aeaee5462487c404f61157aa Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Mar 2023 14:48:32 +0800 Subject: [PATCH 0144/2078] selftests/bpf: Split test_attach_probe into multi subtests In order to adapt to the older kernel, now we split the "attach_probe" testing into multi subtests: manual // manual attach tests for kprobe/uprobe auto // auto-attach tests for kprobe and uprobe kprobe-sleepable // kprobe sleepable test uprobe-lib // uprobe tests for library function by name uprobe-sleepable // uprobe sleepable test uprobe-ref_ctr // uprobe ref_ctr test As sleepable kprobe needs to set BPF_F_SLEEPABLE flag before loading, we need to move it to a stand alone skel file, in case of it is not supported by kernel and make the whole loading fail. Therefore, we can only enable part of the subtests for older kernel. Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Reviewed-by: Biao Jiang Link: https://lore.kernel.org/bpf/20230306064833.7932-3-imagedong@tencent.com --- .../selftests/bpf/prog_tests/attach_probe.c | 260 ++++++++++++------ .../bpf/progs/test_attach_kprobe_sleepable.c | 23 ++ .../selftests/bpf/progs/test_attach_probe.c | 23 +- 3 files changed, 205 insertions(+), 101 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 56374c8b5436d..c374759f39ce0 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "test_attach_kprobe_sleepable.skel.h" #include "test_attach_probe.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ @@ -23,52 +24,24 @@ static noinline void trigger_func3(void) asm volatile (""); } +/* attach point for ref_ctr */ +static noinline void trigger_func4(void) +{ + asm volatile (""); +} + static char test_data[] = "test_data"; -void test_attach_probe(void) +/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */ +static void test_attach_probe_manual(struct test_attach_probe *skel) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; - struct test_attach_probe* skel; - ssize_t uprobe_offset, ref_ctr_offset; - struct bpf_link *uprobe_err_link; - FILE *devnull; - bool legacy; - - /* Check if new-style kprobe/uprobe API is supported. - * Kernels that support new FD-based kprobe and uprobe BPF attachment - * through perf_event_open() syscall expose - * /sys/bus/event_source/devices/kprobe/type and - * /sys/bus/event_source/devices/uprobe/type files, respectively. They - * contain magic numbers that are passed as "type" field of - * perf_event_attr. Lack of such file in the system indicates legacy - * kernel with old-style kprobe/uprobe attach interface through - * creating per-probe event through tracefs. For such cases - * ref_ctr_offset feature is not supported, so we don't test it. - */ - legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; + ssize_t uprobe_offset; uprobe_offset = get_uprobe_offset(&trigger_func); if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) - return; - - ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); - if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) - return; - - skel = test_attach_probe__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - /* sleepable kprobe test case needs flags set before loading */ - if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, - BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) - goto cleanup; - - if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) - goto cleanup; - if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; /* manual-attach kprobe/kretprobe */ @@ -86,18 +59,9 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; - /* auto-attachable kprobe and kretprobe */ - skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); - - skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); - - if (!legacy) - ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); - + /* manual-attach uprobe/uretprobe */ + uprobe_opts.ref_ctr_offset = 0; uprobe_opts.retprobe = false; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */, "/proc/self/exe", @@ -107,12 +71,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uprobe = uprobe_link; - if (!legacy) - ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); - - /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ uprobe_opts.retprobe = true; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */, "/proc/self/exe", @@ -121,12 +80,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; - /* verify auto-attach fails for old-style uprobe definition */ - uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); - if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, - "auto-attach should fail for old-style name")) - goto cleanup; - + /* attach uprobe by function name manually */ uprobe_opts.func_name = "trigger_func2"; uprobe_opts.retprobe = false; uprobe_opts.ref_ctr_offset = 0; @@ -138,11 +92,62 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname")) goto cleanup; + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe & uretprobe */ + trigger_func(); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); + ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); + +cleanup: +} + +static void test_attach_probe_auto(struct test_attach_probe *skel) +{ + struct bpf_link *uprobe_err_link; + + /* auto-attachable kprobe and kretprobe */ + skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); + + skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); + + /* verify auto-attach fails for old-style uprobe definition */ + uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); + if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, + "auto-attach should fail for old-style name")) + return; + /* verify auto-attach works */ skel->links.handle_uretprobe_byname = bpf_program__attach(skel->progs.handle_uretprobe_byname); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname")) - goto cleanup; + return; + + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); + ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); + ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); +} + +static void test_uprobe_lib(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + FILE *devnull; /* test attach by name for a library function, using the library * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). @@ -155,7 +160,7 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) - goto cleanup; + return; uprobe_opts.func_name = "fclose"; uprobe_opts.retprobe = true; @@ -165,62 +170,137 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) + return; + + /* trigger & validate shared library u[ret]probes attached by name */ + devnull = fopen("/dev/null", "r"); + fclose(devnull); + + ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); + ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); +} + +static void test_uprobe_ref_ctr(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + ssize_t uprobe_offset, ref_ctr_offset; + + uprobe_offset = get_uprobe_offset(&trigger_func4); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr")) + return; + + ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); + if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) + return; + + ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); + + uprobe_opts.retprobe = false; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr, + 0 /* self pid */, + "/proc/self/exe", + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr")) + return; + skel->links.handle_uprobe_ref_ctr = uprobe_link; + + ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); + + /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ + uprobe_opts.retprobe = true; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr, + -1 /* any pid */, + "/proc/self/exe", + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr")) + return; + skel->links.handle_uretprobe_ref_ctr = uretprobe_link; +} + +static void test_kprobe_sleepable(void) +{ + struct test_attach_kprobe_sleepable *skel; + + skel = test_attach_kprobe_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open")) + return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel), + "skel_kprobe_sleepable_load")) goto cleanup; /* sleepable kprobes should not attach successfully */ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); - if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) - goto cleanup; + ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"); +cleanup: + test_attach_kprobe_sleepable__destroy(skel); +} + +static void test_uprobe_sleepable(struct test_attach_probe *skel) +{ /* test sleepable uprobe and uretprobe variants */ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) - goto cleanup; + return; skel->bss->user_ptr = test_data; - /* trigger & validate kprobe && kretprobe */ - usleep(1); - - /* trigger & validate shared library u[ret]probes attached by name */ - devnull = fopen("/dev/null", "r"); - fclose(devnull); - - /* trigger & validate uprobe & uretprobe */ - trigger_func(); - - /* trigger & validate uprobe attached by name */ - trigger_func2(); - /* trigger & validate sleepable uprobe attached by name */ trigger_func3(); - ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); - ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); - ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); - ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); - ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); - ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); - ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); - ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); - ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); - ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); +} + +void test_attach_probe(void) +{ + struct test_attach_probe *skel; + + skel = test_attach_probe__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; + if (!ASSERT_OK_PTR(skel->bss, "check_bss")) + goto cleanup; + + if (test__start_subtest("manual")) + test_attach_probe_manual(skel); + if (test__start_subtest("auto")) + test_attach_probe_auto(skel); + if (test__start_subtest("kprobe-sleepable")) + test_kprobe_sleepable(); + if (test__start_subtest("uprobe-lib")) + test_uprobe_lib(skel); + if (test__start_subtest("uprobe-sleepable")) + test_uprobe_sleepable(skel); + if (test__start_subtest("uprobe-ref_ctr")) + test_uprobe_ref_ctr(skel); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c new file mode 100644 index 0000000000000..f548b74462189 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_misc.h" + +int kprobe_res = 0; + +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 3b5dc34d23e99..9e1e7163bb676 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -37,17 +37,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker return 0; } -/** - * This program will be manually made sleepable on the userspace side - * and should thus be unattachable. - */ -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int handle_kprobe_sleepable(struct pt_regs *ctx) -{ - kprobe_res = 2; - return 0; -} - SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { @@ -76,6 +65,18 @@ int handle_uretprobe(struct pt_regs *ctx) return 0; } +SEC("uprobe") +int handle_uprobe_ref_ctr(struct pt_regs *ctx) +{ + return 0; +} + +SEC("uretprobe") +int handle_uretprobe_ref_ctr(struct pt_regs *ctx) +{ + return 0; +} + SEC("uprobe") int handle_uprobe_byname(struct pt_regs *ctx) { -- GitLab From c7aec81b31e43a0aa94ee55d9bb33d70b1046f76 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Mar 2023 14:48:33 +0800 Subject: [PATCH 0145/2078] selftests/bpf: Add test for legacy/perf kprobe/uprobe attach mode Add the testing for kprobe/uprobe attaching in default, legacy, perf and link mode. And the testing passed: ./test_progs -t attach_probe $5/1 attach_probe/manual-default:OK $5/2 attach_probe/manual-legacy:OK $5/3 attach_probe/manual-perf:OK $5/4 attach_probe/manual-link:OK $5/5 attach_probe/auto:OK $5/6 attach_probe/kprobe-sleepable:OK $5/7 attach_probe/uprobe-lib:OK $5/8 attach_probe/uprobe-sleepable:OK $5/9 attach_probe/uprobe-ref_ctr:OK $5 attach_probe:OK Summary: 1/9 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Reviewed-by: Biao Jiang Link: https://lore.kernel.org/bpf/20230306064833.7932-4-imagedong@tencent.com --- .../selftests/bpf/prog_tests/attach_probe.c | 37 +++++++++---- .../selftests/bpf/progs/test_attach_probe.c | 32 ----------- .../bpf/progs/test_attach_probe_manual.c | 53 +++++++++++++++++++ 3 files changed, 81 insertions(+), 41 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_attach_probe_manual.c diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index c374759f39ce0..7175af39134f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "test_attach_kprobe_sleepable.skel.h" +#include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ @@ -33,33 +34,43 @@ static noinline void trigger_func4(void) static char test_data[] = "test_data"; /* manual attach kprobe/kretprobe/uprobe/uretprobe testings */ -static void test_attach_probe_manual(struct test_attach_probe *skel) +static void test_attach_probe_manual(enum probe_attach_mode attach_mode) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe_manual *skel; ssize_t uprobe_offset; + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + uprobe_offset = get_uprobe_offset(&trigger_func); if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) goto cleanup; /* manual-attach kprobe/kretprobe */ - kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, - false /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.attach_mode = attach_mode; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; skel->links.handle_kprobe = kprobe_link; - kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, - true /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe")) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = attach_mode; uprobe_opts.ref_ctr_offset = 0; uprobe_opts.retprobe = false; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, @@ -108,6 +119,7 @@ static void test_attach_probe_manual(struct test_attach_probe *skel) ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); cleanup: + test_attach_probe_manual__destroy(skel); } static void test_attach_probe_auto(struct test_attach_probe *skel) @@ -289,8 +301,15 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; - if (test__start_subtest("manual")) - test_attach_probe_manual(skel); + if (test__start_subtest("manual-default")) + test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT); + if (test__start_subtest("manual-legacy")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY); + if (test__start_subtest("manual-perf")) + test_attach_probe_manual(PROBE_ATTACH_MODE_PERF); + if (test__start_subtest("manual-link")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LINK); + if (test__start_subtest("auto")) test_attach_probe_auto(skel); if (test__start_subtest("kprobe-sleepable")) diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 9e1e7163bb676..68466a6ad18c7 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -7,12 +7,8 @@ #include #include "bpf_misc.h" -int kprobe_res = 0; int kprobe2_res = 0; -int kretprobe_res = 0; int kretprobe2_res = 0; -int uprobe_res = 0; -int uretprobe_res = 0; int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; @@ -23,13 +19,6 @@ int uretprobe_byname3_sleepable_res = 0; int uretprobe_byname3_res = 0; void *user_ptr = 0; -SEC("kprobe") -int handle_kprobe(struct pt_regs *ctx) -{ - kprobe_res = 1; - return 0; -} - SEC("ksyscall/nanosleep") int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { @@ -37,13 +26,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker return 0; } -SEC("kretprobe") -int handle_kretprobe(struct pt_regs *ctx) -{ - kretprobe_res = 2; - return 0; -} - SEC("kretsyscall/nanosleep") int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { @@ -51,20 +33,6 @@ int BPF_KRETPROBE(handle_kretprobe_auto, int ret) return ret; } -SEC("uprobe") -int handle_uprobe(struct pt_regs *ctx) -{ - uprobe_res = 3; - return 0; -} - -SEC("uretprobe") -int handle_uretprobe(struct pt_regs *ctx) -{ - uretprobe_res = 4; - return 0; -} - SEC("uprobe") int handle_uprobe_ref_ctr(struct pt_regs *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c new file mode 100644 index 0000000000000..7f08bce94596f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_misc.h" + +int kprobe_res = 0; +int kretprobe_res = 0; +int uprobe_res = 0; +int uretprobe_res = 0; +int uprobe_byname_res = 0; +void *user_ptr = 0; + +SEC("kprobe") +int handle_kprobe(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +SEC("kretprobe") +int handle_kretprobe(struct pt_regs *ctx) +{ + kretprobe_res = 2; + return 0; +} + +SEC("uprobe") +int handle_uprobe(struct pt_regs *ctx) +{ + uprobe_res = 3; + return 0; +} + +SEC("uretprobe") +int handle_uretprobe(struct pt_regs *ctx) +{ + uretprobe_res = 4; + return 0; +} + +SEC("uprobe") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_res = 5; + return 0; +} + + +char _license[] SEC("license") = "GPL"; -- GitLab From d509c55cda22096a1836e35b03f66e1ef411c0c2 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 1 Mar 2023 12:09:13 +0200 Subject: [PATCH 0146/2078] wifi: nl80211: Update the documentation of NL80211_SCAN_FLAG_COLOCATED_6GHZ Add a detailed description of NL80211_SCAN_FLAG_COLOCATED_6GHZ flag. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.487ab04feb39.I5129fd61841332474693046241586f057b134c3c@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f14621a954e10..c22eeb18b9967 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6510,8 +6510,14 @@ enum nl80211_timeout_reason { * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with * %NL80211_ATTR_SCAN_FREQ_KHZ. This also means * %NL80211_ATTR_SCAN_FREQUENCIES will not be included. - * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by - * 2.4/5 GHz APs + * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for collocated APs reported by + * 2.4/5 GHz APs. When the flag is set, the scan logic will use the + * information from the RNR element found in beacons/probe responses + * received on the 2.4/5 GHz channels to actively scan only the 6GHz + * channels on which APs are expected to be found. Note that when not set, + * the scan logic would scan all 6GHz channels, but since transmission of + * probe requests on non PSC channels is limited, it is highly likely that + * these channels would passively be scanned. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, -- GitLab From 0fd3af61731567dc0ad798c88c20f6d63fe331ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:14 +0200 Subject: [PATCH 0147/2078] wifi: mac80211: adjust scan cancel comment/check Instead of the comment about holding RTNL, which is now wrong, add a proper lockdep assertion for the wiphy mutex. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.84352e46f342.Id90fef8c581cebe19cb30274340cf43885d55c74@changeid Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index dc3cdee51e660..32fa8aca70056 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include @@ -1246,11 +1246,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, return ret; } -/* - * Only call this function when a scan can't be queued -- under RTNL. - */ void ieee80211_scan_cancel(struct ieee80211_local *local) { + /* ensure a new scan cannot be queued */ + lockdep_assert_wiphy(local->hw.wiphy); + /* * We are canceling software scan, or deferred scan that was not * yet really started (see __ieee80211_start_scan ). -- GitLab From 3a867c7eef812ad4684dccd8825f1099a60b8c98 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:15 +0200 Subject: [PATCH 0148/2078] wifi: mac80211: clear all bits that relate rtap fields on skb Since we remove radiotap from skb data, clear all RX_FLAG_X related info that indicate info on the skb data. Also we need to do it only once so remove the clear from cooked_monitor. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.74d3efe19eae.Ie17a35864d2e120f9858516a2e3d3047d83cf805@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7fdfe710951f..095bcd2552bb0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -43,6 +43,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, unsigned int present_fcs_len, unsigned int rtap_space) { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr; unsigned int hdrlen; __le16 fc; @@ -51,6 +52,14 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, __pskb_trim(skb, skb->len - present_fcs_len); pskb_pull(skb, rtap_space); + /* After pulling radiotap header, clear all flags that indicate + * info in skb->data. + */ + status->flag &= ~(RX_FLAG_RADIOTAP_VENDOR_DATA | + RX_FLAG_RADIOTAP_LSIG | + RX_FLAG_RADIOTAP_HE_MU | + RX_FLAG_RADIOTAP_HE); + hdr = (void *)skb->data; fc = hdr->frame_control; @@ -3916,8 +3925,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (!local->cooked_mntrs) goto out_free_skb; - /* vendor data is long removed here */ - status->flag &= ~RX_FLAG_RADIOTAP_VENDOR_DATA; /* room for the radiotap header based on driver features */ needed_headroom = ieee80211_rx_radiotap_hdrlen(local, status, skb); -- GitLab From 3ffcc659dc6fa84da7db57f56fd48756c5a60fba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:16 +0200 Subject: [PATCH 0149/2078] wifi: mac80211: check key taint for beacon protection This will likely never happen, but for completeness check the key taint flag before using a key for beacon protection. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.cf2c3fee6f1f.I2f19b3e04e31c99bed3c9dc71935bf513b2cd177@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7699fb4106701..1f9294f5baa49 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5115,6 +5115,12 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, tx.key = rcu_dereference(link->default_beacon_key); if (!tx.key) return 0; + + if (unlikely(tx.key->flags & KEY_FLAG_TAINTED)) { + tx.key = NULL; + return -EINVAL; + } + tx.local = local; tx.sdata = sdata; __skb_queue_head_init(&tx.skbs); -- GitLab From d1b9bb6520fe0c295128229f5d3a9298127d5c8a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:17 +0200 Subject: [PATCH 0150/2078] wifi: mac80211: allow beacon protection HW offload In case of beacon protection, check if the key was offloaded to the hardware and in that case set control.hw_key so that the encryption function will see it and only do the needed steps that aren't done in hardware. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.b2becd9a22fb.I6c0b9c50c6a481128ba912a11cb7afc92c4b6da7@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1f9294f5baa49..628d60f3db2ab 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5121,6 +5121,10 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, return -EINVAL; } + if (!(tx.key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT_TX) && + tx.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + IEEE80211_SKB_CB(skb)->control.hw_key = &tx.key->conf; + tx.local = local; tx.sdata = sdata; __skb_queue_head_init(&tx.skbs); -- GitLab From 724a486cd24581928f4a82619806939119304645 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:18 +0200 Subject: [PATCH 0151/2078] wifi: wireless: return primary channel regardless of DUP Currently in case DUP bit is not set we don't return the primary channel for 6 GHz Band, but the spec says that the DUP bit has no effect on this field: 9.4.2.249 HE Operation element: The Duplicate Beacon subfield is set to 1 if the AP transmits Beacon frames in non-HT duplicate PPDU with a TXVECTOR parameter CH_BANDWIDTH value that is up to the BSS bandwidth and is set to 0 otherwise. So remove the condition for returning primary channel based on DUP. Since the caller code already marks the signal as invalid in case the indicated frequency is not the tuned frequency, there's no need to additionally handle this case here since that's already true for duplicated beacons on the non-primary channel(s). Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.66d7f05f7d11.I5e0add054f72ede95611391b99804c61c40cc959@changeid [clarify commit message] Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 790bc31cf82ea..72e9af6158efd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1830,9 +1830,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (!he_6ghz_oper) return -1; - if (ftype != CFG80211_BSS_FTYPE_BEACON || - he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON) - return he_6ghz_oper->primary; + return he_6ghz_oper->primary; } } else if (band == NL80211_BAND_S1GHZ) { tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen); -- GitLab From fb4b441c5d764813370e381310d9f82b38e7cd3f Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:19 +0200 Subject: [PATCH 0152/2078] wifi: wireless: correct primary channel validation on 6 GHz The check that beacon primary channel is in the range of 80 MHz (abs < 80) is invalid for 320 MHz since duplicate beacon transmit means that the AP transmits it on all the 20 MHz sub-channels: 9.4.2.249 HE Operation element - ... AP transmits Beacon frames in non-HT duplicate PPDU with a TXVECTOR parameter CH_BANDWIDTH value that is up to the BSS bandwidth. So in case of 320 MHz the DUP beacon can be in upper 160 for primary channel in the lower 160 giving possibly an absolute range of over 80 MHz. Also this check is redundant alltogether, if AP has a wrong primary channel in the beacon it's a faulty AP, and we would fail in next steps to connect. While at it, fix the frequency comparison to no longer compare between KHz and MHz, which was introduced by commit 7f599aeccbd2 ("cfg80211: Use the HE operation IE to determine a 6GHz BSS channel"). Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.314faf725255.I5e27251ac558297553b590d3917a7b6d1aae0e74@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 72e9af6158efd..c23709d852bc7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1886,22 +1886,21 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); /* - * In 6GHz, duplicated beacon indication is relevant for - * beacons only. + * Frame info (beacon/prob res) is the same as received channel, + * no need for further processing. */ - if (channel->band == NL80211_BAND_6GHZ && - (freq == channel->center_freq || - abs(freq - channel->center_freq) > 80)) + if (freq == ieee80211_channel_to_khz(channel)) return channel; alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { - if (channel->band == NL80211_BAND_2GHZ) { + if (channel->band == NL80211_BAND_2GHZ || + channel->band == NL80211_BAND_6GHZ) { /* * Better not allow unexpected channels when that could * be going beyond the 1-11 range (e.g., discovering * BSS on channel 12 when radio is configured for - * channel 11. + * channel 11) or beyond the 6 GHz channel range. */ return NULL; } -- GitLab From 6ff9efcfc2dc256480b252321818e0111b9399a2 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:20 +0200 Subject: [PATCH 0153/2078] wifi: wireless: cleanup unused function parameters In the past ftype was used for deciding about 6G DUP beacon, but the logic was removed and ftype is not needed anymore. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.98d4761b809b.I255f5ecd77cb24fcf2f1641bb5833ea2d121296e@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +--- net/wireless/scan.c | 21 +++++---------------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f115b25503096..03b911abd7720 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6814,13 +6814,11 @@ enum cfg80211_bss_frame_type { * @ie: IEs * @ielen: length of IEs * @band: enum nl80211_band of the channel - * @ftype: frame type * * Returns the channel number, or -1 if none could be determined. */ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band, - enum cfg80211_bss_frame_type ftype); + enum nl80211_band band); /** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS diff --git a/net/wireless/scan.c b/net/wireless/scan.c index c23709d852bc7..a1382255fab35 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1810,8 +1810,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band, - enum cfg80211_bss_frame_type ftype) + enum nl80211_band band) { const struct element *tmp; @@ -1868,15 +1867,14 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype) + enum nl80211_bss_scan_width scan_width) { u32 freq; int channel_number; struct ieee80211_channel *alt_channel; channel_number = cfg80211_get_ies_channel_number(ie, ielen, - channel->band, ftype); + channel->band); if (channel_number < 0) { /* No channel information in frame payload */ @@ -1954,7 +1952,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width, ftype); + data->scan_width); if (!channel) return NULL; @@ -2388,7 +2386,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); int bss_type; - enum cfg80211_bss_frame_type ftype; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -2425,16 +2422,8 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else if (ieee80211_is_probe_resp(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_PRESP; - else - ftype = CFG80211_BSS_FTYPE_UNKNOWN; - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width, - ftype); + ielen, data->chan, data->scan_width); if (!channel) return NULL; -- GitLab From cbbaf2bb829b6c4ef911d4a725fc9b1fadc1e43f Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 1 Mar 2023 12:09:21 +0200 Subject: [PATCH 0154/2078] wifi: nl80211: add a command to enable/disable HW timestamping Add a command to enable and disable HW timestamping of TM and FTM frames. HW timestamping can be enabled for a specific mac address or for all addresses. The low level driver will indicate how many peers HW timestamping can be enabled concurrently, and this information will be passed to userspace. Signed-off-by: Avraham Stern Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.05678d7b1c17.Iccc08869ea8156f1c71a3111a47f86dd56234bd0@changeid [switch to needing netdev UP, minor edits] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 27 ++++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 22 +++++++++++++++++++++ net/wireless/nl80211.c | 37 ++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 17 +++++++++++++++++ net/wireless/trace.h | 25 ++++++++++++++++++++++++ 5 files changed, 128 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 03b911abd7720..f0da61c6ec4bc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -827,6 +827,18 @@ struct cfg80211_fils_aad { const u8 *anonce; }; +/** + * struct cfg80211_set_hw_timestamp - enable/disable HW timestamping + * @macaddr: peer MAC address. NULL to enable/disable HW timestamping for all + * addresses. + * @enable: if set, enable HW timestamping for the specified MAC address. + * Otherwise disable HW timestamping for the specified MAC address. + */ +struct cfg80211_set_hw_timestamp { + const u8 *macaddr; + bool enable; +}; + /** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition @@ -4330,6 +4342,8 @@ struct mgmt_frame_regs { * @add_link_station: Add a link to a station. * @mod_link_station: Modify a link of a station. * @del_link_station: Remove a link of a station. + * + * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4683,6 +4697,8 @@ struct cfg80211_ops { struct link_station_parameters *params); int (*del_link_station)(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params); + int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts); }; /* @@ -5139,6 +5155,8 @@ struct wiphy_iftype_akm_suites { int n_akm_suites; }; +#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff + /** * struct wiphy - wireless hardware description * @mtx: mutex for the data (structures) of this device @@ -5348,6 +5366,13 @@ struct wiphy_iftype_akm_suites { * NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with * legacy userspace and maximum allowed value is * CFG80211_MAX_NUM_AKM_SUITES. + * + * @hw_timestamp_max_peers: maximum number of peers that the driver supports + * enabling HW timestamping for concurrently. Setting this field to a + * non-zero value indicates that the driver supports HW timestamping. + * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver + * supports enabling HW timestamping for all peers (i.e. no need to + * specify a mac address). */ struct wiphy { struct mutex mtx; @@ -5496,6 +5521,8 @@ struct wiphy { u8 ema_max_profile_periodicity; u16 max_num_akm_suites; + u16 hw_timestamp_max_peers; + char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c22eeb18b9967..c8520c150f9c4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1299,6 +1299,16 @@ * @NL80211_CMD_MODIFY_LINK_STA: Modify a link of an MLD station * @NL80211_CMD_REMOVE_LINK_STA: Remove a link of an MLD station * + * @NL80211_CMD_SET_HW_TIMESTAMP: Enable/disable HW timestamping of Timing + * measurement and Fine timing measurement frames. If %NL80211_ATTR_MAC + * is included, enable/disable HW timestamping only for frames to/from the + * specified MAC address. Otherwise enable/disable HW timestamping for + * all TM/FTM frames (including ones that were enabled with specific MAC + * address). If %NL80211_ATTR_HW_TIMESTAMP_ENABLED is not included, disable + * HW timestamping. + * The number of peers that HW timestamping can be enabled for concurrently + * is indicated by %NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1550,6 +1560,8 @@ enum nl80211_commands { NL80211_CMD_MODIFY_LINK_STA, NL80211_CMD_REMOVE_LINK_STA, + NL80211_CMD_SET_HW_TIMESTAMP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2775,6 +2787,13 @@ enum nl80211_commands { * indicates that the sub-channel is punctured. Higher 16 bits are * reserved. * + * @NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS: Maximum number of peers that HW + * timestamping can be enabled for concurrently (u16), a wiphy attribute. + * A value of 0xffff indicates setting for all peers (i.e. not specifying + * an address with %NL80211_CMD_SET_HW_TIMESTAMP) is supported. + * @NL80211_ATTR_HW_TIMESTAMP_ENABLED: Indicates whether HW timestamping should + * be enabled or not (flag attribute). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3306,6 +3325,9 @@ enum nl80211_attrs { NL80211_ATTR_PUNCT_BITMAP, + NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, + NL80211_ATTR_HW_TIMESTAMP_ENABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 112b4bb009c80..ab0497efdd372 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -806,6 +806,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff), + + [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, + [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -2964,6 +2967,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO) nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT); + if (rdev->wiphy.hw_timestamp_max_peers && + nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, + rdev->wiphy.hw_timestamp_max_peers)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -16162,6 +16170,29 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) return ret; } +static int nl80211_set_hw_timestamp(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_set_hw_timestamp hwts = {}; + + if (!rdev->wiphy.hw_timestamp_max_peers) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] && + rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_MAC]) + hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + hwts.enable = + nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]); + + return rdev_set_hw_timestamp(rdev, dev, &hwts); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -17336,6 +17367,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, + { + .cmd = NL80211_CMD_SET_HW_TIMESTAMP, + .doit = nl80211_set_hw_timestamp, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 13b209a8db287..2e497cf26ef28 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1494,4 +1494,21 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + trace_rdev_set_hw_timestamp(wiphy, dev, hwts); + ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts); + trace_rdev_return_int(wiphy, ret); + + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ca7474eec7234..f3fcfc4fcce55 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3918,6 +3918,31 @@ TRACE_EVENT(rdev_del_link_station, __entry->link_id) ); +TRACE_EVENT(rdev_set_hw_timestamp, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_set_hw_timestamp *hwts), + + TP_ARGS(wiphy, netdev, hwts), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(macaddr) + __field(bool, enable) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(macaddr, hwts->macaddr); + __entry->enable = hwts->enable; + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, + __entry->enable) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- GitLab From 81202305f7c282c356c337dded8472d884acd94b Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 1 Mar 2023 12:09:22 +0200 Subject: [PATCH 0155/2078] wifi: mac80211: add support for set_hw_timestamp command Support the set_hw_timestamp callback for enabling and disabling HW timestamping if the low level driver supports it. Signed-off-by: Avraham Stern Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.700ded7badde.Ib2f7c228256ce313a04d3d9f9ecc6c7b9aa602bb@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/cfg.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 219fd15893b06..6946c9d95aece 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4230,6 +4230,9 @@ struct ieee80211_prep_tx_info { * Note that a sta can also be inserted or removed with valid links, * i.e. passed to @sta_add/@sta_state with sta->valid_links not zero. * In fact, cannot change from having valid_links and not having them. + * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. This is + * not restored at HW reset by mac80211 so drivers need to take care of + * that. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4589,6 +4592,9 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, u16 old_links, u16 new_links); + int (*set_hw_timestamp)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_set_hw_timestamp *hwts); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8eb3423008687..7e90f4a81962c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4904,6 +4904,22 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (!local->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5014,4 +5030,5 @@ const struct cfg80211_ops mac80211_config_ops = { .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, + .set_hw_timestamp = ieee80211_set_hw_timestamp, }; -- GitLab From 4c532321bf90288dae6b07a3f52279bfde842a80 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:23 +0200 Subject: [PATCH 0156/2078] wifi: cfg80211/mac80211: report link ID on control port RX For control port RX, report the link ID for MLO. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.fe06dfc3791b.Iddcab94789cafe336417be406072ce8a6312fc2d@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++-- net/mac80211/rx.c | 2 +- net/wireless/nl80211.c | 15 ++++++++++----- net/wireless/trace.h | 11 +++++++---- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f0da61c6ec4bc..7cebba1c41356 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8126,6 +8126,7 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie, * responsible for any cleanup. The caller must also ensure that * skb->protocol is set appropriately. * @unencrypted: Whether the frame was received unencrypted + * @link_id: the link the frame was received on, -1 if not applicable or unknown * * This function is used to inform userspace about a received control port * frame. It should only be used if userspace indicated it wants to receive @@ -8136,8 +8137,8 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie, * * Return: %true if the frame was passed to userspace */ -bool cfg80211_rx_control_port(struct net_device *dev, - struct sk_buff *skb, bool unencrypted); +bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, + bool unencrypted, int link_id); /** * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 095bcd2552bb0..f63ed6b91d9b5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2591,7 +2591,7 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); - cfg80211_rx_control_port(dev, skb, noencrypt); + cfg80211_rx_control_port(dev, skb, noencrypt, rx->link_id); dev_kfree_skb(skb); } else { struct ethhdr *ehdr = (void *)skb_mac_header(skb); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ab0497efdd372..85f714e1af871 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18754,7 +18754,9 @@ EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext); static int __nl80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, - bool unencrypted, gfp_t gfp) + bool unencrypted, + int link_id, + gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -18786,6 +18788,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || + (link_id >= 0 && + nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; @@ -18804,13 +18808,14 @@ static int __nl80211_rx_control_port(struct net_device *dev, return -ENOBUFS; } -bool cfg80211_rx_control_port(struct net_device *dev, - struct sk_buff *skb, bool unencrypted) +bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, + bool unencrypted, int link_id) { int ret; - trace_cfg80211_rx_control_port(dev, skb, unencrypted); - ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC); + trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id); + ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id, + GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f3fcfc4fcce55..716a1fa700697 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3165,14 +3165,15 @@ TRACE_EVENT(cfg80211_control_port_tx_status, TRACE_EVENT(cfg80211_rx_control_port, TP_PROTO(struct net_device *netdev, struct sk_buff *skb, - bool unencrypted), - TP_ARGS(netdev, skb, unencrypted), + bool unencrypted, int link_id), + TP_ARGS(netdev, skb, unencrypted, link_id), TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) + __field(int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; @@ -3180,10 +3181,12 @@ TRACE_EVENT(cfg80211_rx_control_port, MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s", + TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d", NETDEV_PR_ARG, __entry->len, __entry->from, - __entry->proto, BOOL_TO_STR(__entry->unencrypted)) + __entry->proto, BOOL_TO_STR(__entry->unencrypted), + __entry->link_id) ); TRACE_EVENT(cfg80211_cqm_rssi_notify, -- GitLab From a1e91ef92392e5da15a1a16f8545ede2c02f7049 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:24 +0200 Subject: [PATCH 0157/2078] wifi: mac80211: warn only once on AP probe We should perhaps support this API for MLO, but it's not clear that it makes sense, in any case then we'd have to update it to probe the correct BSS. For now, if it happens, warn only once so that we don't get flooded with messages if the driver misbehaves and calls this. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.1c8499b6fbe6.I1a76a2be3b42ff93904870ac069f0319507adc23@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 60792dfabc9d6..cdf842c0849b1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3227,7 +3227,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON_ONCE(sdata->vif.valid_links)) return; if (!ieee80211_sdata_running(sdata)) -- GitLab From e1f113cc67870375eae0c7b84c2a40cc6388d903 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 1 Mar 2023 12:09:25 +0200 Subject: [PATCH 0158/2078] wifi: mac80211: add pointer from bss_conf to vif While often not needed, this considerably simplifies going from a link specific bss_config to the vif. This helps with e.g. creating link specific debugfs entries inside drivers. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.46f701a10ed5.I20390b2a8165ff222d66585915689206ea93222b@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/link.c | 1 + 2 files changed, 4 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6946c9d95aece..2a1874d994e0f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -534,6 +534,7 @@ struct ieee80211_fils_discovery { * This structure keeps information about a BSS (and an association * to that BSS) that can change during the lifetime of the BSS. * + * @vif: reference to owning VIF * @addr: (link) address used locally * @link_id: link ID, or 0 for non-MLO * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE @@ -675,6 +676,8 @@ struct ieee80211_fils_discovery { * bandwidth */ struct ieee80211_bss_conf { + struct ieee80211_vif *vif; + const u8 *bssid; unsigned int link_id; u8 addr[ETH_ALEN] __aligned(2); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 8c8869cc1fb4c..e6f9fce1dadb9 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -34,6 +34,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link->link_id = link_id; link->conf = link_conf; link_conf->link_id = link_id; + link_conf->vif = &sdata->vif; INIT_WORK(&link->csa_finalize_work, ieee80211_csa_finalize_work); -- GitLab From 586100ad85fed0041e203d6006d45e0ec4c5a0b4 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 1 Mar 2023 12:09:26 +0200 Subject: [PATCH 0159/2078] wifi: mac80211: remove SMPS from AP debugfs The spatial multiplexing power save feature does not apply to AP mode. Remove it from debugfs in this case. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.01b167027dd5.Iee69f2e4df98581f259ef2c76309b940b20174be@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 0bac9af3ca966..187bb22c0bbd9 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -698,7 +698,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(num_mcast_sta); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); -- GitLab From 170cd6a66d9a164180eb4dc72d50afa6ce1ce566 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 1 Mar 2023 12:09:27 +0200 Subject: [PATCH 0160/2078] wifi: mac80211: add netdev per-link debugfs data and driver hook This adds the infrastructure to have netdev specific per-link data both for mac80211 and the driver in debugfs. For the driver, a new callback is added which is only used if MLO is supported. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.fb4c947e4df8.I69b3516ddf4c8a7501b395f652d6063444ecad63@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 10 ++ net/mac80211/debugfs_netdev.c | 222 +++++++++++++++++++++++++--------- net/mac80211/debugfs_netdev.h | 16 +++ net/mac80211/driver-ops.c | 25 +++- net/mac80211/driver-ops.h | 16 +++ net/mac80211/ieee80211_i.h | 4 + net/mac80211/link.c | 4 + 7 files changed, 242 insertions(+), 55 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a1874d994e0f..5df9eb828a581 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3844,6 +3844,12 @@ struct ieee80211_prep_tx_info { * the station. See @sta_pre_rcu_remove if needed. * This callback can sleep. * + * @link_add_debugfs: Drivers can use this callback to add debugfs files + * when a link is added to a mac80211 vif. This callback should be within + * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. + * For non-MLO the callback will be called once for the default bss_conf + * with the vif's directory rather than a separate subdirectory. + * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback * should be within a CONFIG_MAC80211_DEBUGFS conditional. This @@ -4325,6 +4331,10 @@ struct ieee80211_ops { int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); #ifdef CONFIG_MAC80211_DEBUGFS + void (*link_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, + struct dentry *dir); void (*sta_add_debugfs)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 187bb22c0bbd9..b0cef37eb3948 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -23,16 +23,16 @@ #include "driver-ops.h" static ssize_t ieee80211_if_read( - struct ieee80211_sub_if_data *sdata, + void *data, char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) + ssize_t (*format)(const void *, char *, int)) { char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(data, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -42,10 +42,10 @@ static ssize_t ieee80211_if_read( } static ssize_t ieee80211_if_write( - struct ieee80211_sub_if_data *sdata, + void *data, const char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) + ssize_t (*write)(void *, const char *, int)) { char buf[64]; ssize_t ret; @@ -58,64 +58,64 @@ static ssize_t ieee80211_if_write( buf[count] = '\0'; rtnl_lock(); - ret = (*write)(sdata, buf, count); + ret = (*write)(data, buf, count); rtnl_unlock(); return ret; } -#define IEEE80211_IF_FMT(name, field, format_string) \ +#define IEEE80211_IF_FMT(name, type, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, format_string, sdata->field); \ + return scnprintf(buf, buflen, format_string, data->field); \ } -#define IEEE80211_IF_FMT_DEC(name, field) \ - IEEE80211_IF_FMT(name, field, "%d\n") -#define IEEE80211_IF_FMT_HEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#x\n") -#define IEEE80211_IF_FMT_LHEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#lx\n") +#define IEEE80211_IF_FMT_DEC(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%d\n") +#define IEEE80211_IF_FMT_HEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#lx\n") -#define IEEE80211_IF_FMT_HEXARRAY(name, field) \ +#define IEEE80211_IF_FMT_HEXARRAY(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ char *p = buf; \ int i; \ - for (i = 0; i < sizeof(sdata->field); i++) { \ + for (i = 0; i < sizeof(data->field); i++) { \ p += scnprintf(p, buflen + buf - p, "%.2x ", \ - sdata->field[i]); \ + data->field[i]); \ } \ p += scnprintf(p, buflen + buf - p, "\n"); \ return p - buf; \ } -#define IEEE80211_IF_FMT_ATOMIC(name, field) \ +#define IEEE80211_IF_FMT_ATOMIC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ - return scnprintf(buf, buflen, "%d\n", atomic_read(&sdata->field));\ + return scnprintf(buf, buflen, "%d\n", atomic_read(&data->field));\ } -#define IEEE80211_IF_FMT_MAC(name, field) \ +#define IEEE80211_IF_FMT_MAC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, "%pM\n", sdata->field); \ + return scnprintf(buf, buflen, "%pM\n", data->field); \ } -#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \ +#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ return scnprintf(buf, buflen, "%d\n", \ - jiffies_to_msecs(sdata->field)); \ + jiffies_to_msecs(data->field)); \ } #define _IEEE80211_IF_FILE_OPS(name, _read, _write) \ @@ -126,43 +126,67 @@ static const struct file_operations name##_ops = { \ .llseek = generic_file_llseek, \ } -#define _IEEE80211_IF_FILE_R_FN(name) \ +#define _IEEE80211_IF_FILE_R_FN(name, type) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(const void *, char *, int) = (void *) \ + ((ssize_t (*)(const type, char *, int)) \ + ieee80211_if_fmt_##name); \ return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, \ - ieee80211_if_fmt_##name); \ + userbuf, count, ppos, fn); \ } -#define _IEEE80211_IF_FILE_W_FN(name) \ +#define _IEEE80211_IF_FILE_W_FN(name, type) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(void *, const char *, int) = (void *) \ + ((ssize_t (*)(type, const char *, int)) \ + ieee80211_if_parse_##name); \ return ieee80211_if_write(file->private_data, userbuf, count, \ - ppos, ieee80211_if_parse_##name); \ + ppos, fn); \ } #define IEEE80211_IF_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) #define IEEE80211_IF_FILE(name, field, format) \ - IEEE80211_IF_FMT_##format(name, field) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \ IEEE80211_IF_FILE_R(name) +/* Same but with a link_ prefix in the ops variable name and different type */ +#define IEEE80211_IF_LINK_FILE_R(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL) + +#define IEEE80211_IF_LINK_FILE_W(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE_RW(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \ + ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE(name, field, format) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_link_data, field) \ + IEEE80211_IF_LINK_FILE_R(name) + /* common attributes */ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ], HEX); @@ -207,9 +231,9 @@ IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz); IEEE80211_IF_FILE(flags, flags, HEX); IEEE80211_IF_FILE(state, state, LHEX); -IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC); -IEEE80211_IF_FILE(ap_power_level, deflink.ap_power_level, DEC); -IEEE80211_IF_FILE(user_power_level, deflink.user_power_level, DEC); +IEEE80211_IF_LINK_FILE(txpower, conf->txpower, DEC); +IEEE80211_IF_LINK_FILE(ap_power_level, ap_power_level, DEC); +IEEE80211_IF_LINK_FILE(user_power_level, user_power_level, DEC); static ssize_t ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata, @@ -236,9 +260,10 @@ IEEE80211_IF_FILE(bssid, deflink.u.mgd.bssid, MAC); IEEE80211_IF_FILE(aid, vif.cfg.aid, DEC); IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS); -static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, +static int ieee80211_set_smps(struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; int err; @@ -256,7 +281,7 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, return -EOPNOTSUPP; sdata_lock(sdata); - err = __ieee80211_request_smps_mgd(sdata, &sdata->deflink, smps_mode); + err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); sdata_unlock(sdata); return err; @@ -269,24 +294,24 @@ static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { [IEEE80211_SMPS_DYNAMIC] = "dynamic", }; -static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_link_data *link, char *buf, int buflen) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (link->sdata->vif.type == NL80211_IFTYPE_STATION) return snprintf(buf, buflen, "request: %s\nused: %s\n", - smps_modes[sdata->deflink.u.mgd.req_smps], - smps_modes[sdata->deflink.smps_mode]); + smps_modes[link->u.mgd.req_smps], + smps_modes[link->smps_mode]); return -EINVAL; } -static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_parse_smps(struct ieee80211_link_data *link, const char *buf, int buflen) { enum ieee80211_smps_mode mode; for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) { if (strncmp(buf, smps_modes[mode], buflen) == 0) { - int err = ieee80211_set_smps(sdata, mode); + int err = ieee80211_set_smps(link, mode); if (!err) return buflen; return err; @@ -295,7 +320,7 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, return -EINVAL; } -IEEE80211_IF_FILE_RW(smps); +IEEE80211_IF_LINK_FILE_RW(smps); static ssize_t ieee80211_if_parse_tkip_mic_test( struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) @@ -595,6 +620,8 @@ static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sda } IEEE80211_IF_FILE_RW(active_links); +IEEE80211_IF_LINK_FILE(addr, conf->addr, MAC); + #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -685,7 +712,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(bssid); DEBUGFS_ADD(aid); DEBUGFS_ADD(beacon_timeout); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD_MODE(tkip_mic_test, 0200); DEBUGFS_ADD_MODE(beacon_loss, 0200); DEBUGFS_ADD_MODE(uapsd_queues, 0600); @@ -788,9 +814,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(flags); DEBUGFS_ADD(state); - DEBUGFS_ADD(txpower); - DEBUGFS_ADD(user_power_level); - DEBUGFS_ADD(ap_power_level); if (sdata->vif.type != NL80211_IFTYPE_MONITOR) add_common_files(sdata); @@ -820,6 +843,31 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } +#undef DEBUGFS_ADD_MODE +#undef DEBUGFS_ADD + +#define DEBUGFS_ADD_MODE(dentry, name, mode) \ + debugfs_create_file(#name, mode, dentry, \ + link, &link_##name##_ops) + +#define DEBUGFS_ADD(dentry, name) DEBUGFS_ADD_MODE(dentry, name, 0400) + +static void add_link_files(struct ieee80211_link_data *link, + struct dentry *dentry) +{ + DEBUGFS_ADD(dentry, txpower); + DEBUGFS_ADD(dentry, user_power_level); + DEBUGFS_ADD(dentry, ap_power_level); + + switch (link->sdata->vif.type) { + case NL80211_IFTYPE_STATION: + DEBUGFS_ADD_MODE(dentry, smps, 0600); + break; + default: + break; + } +} + void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) { char buf[10+IFNAMSIZ]; @@ -830,6 +878,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", sdata->vif.debugfs_dir); add_files(sdata); + + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + add_link_files(&sdata->deflink, sdata->vif.debugfs_dir); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) @@ -855,3 +906,66 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->name); debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); } + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{ + char link_dir_name[10]; + + if (WARN_ON(!link->sdata->vif.debugfs_dir)) + return; + + /* For now, this should not be called for non-MLO capable drivers */ + if (WARN_ON(!(link->sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))) + return; + + snprintf(link_dir_name, sizeof(link_dir_name), + "link-%d", link->link_id); + + link->debugfs_dir = + debugfs_create_dir(link_dir_name, + link->sdata->vif.debugfs_dir); + + DEBUGFS_ADD(link->debugfs_dir, addr); + add_link_files(link, link->debugfs_dir); +} + +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{ + if (!link->sdata->vif.debugfs_dir || !link->debugfs_dir) { + link->debugfs_dir = NULL; + return; + } + + if (link->debugfs_dir == link->sdata->vif.debugfs_dir) { + WARN_ON(link != &link->sdata->deflink); + link->debugfs_dir = NULL; + return; + } + + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; +} + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{ + if (WARN_ON(!link->debugfs_dir)) + return; + + drv_link_add_debugfs(link->sdata->local, link->sdata, + link->conf, link->debugfs_dir); +} + +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{ + if (!link || !link->debugfs_dir) + return; + + if (WARN_ON(link->debugfs_dir == link->sdata->vif.debugfs_dir)) + return; + + /* Recreate the directory excluding the driver data */ + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; + + ieee80211_link_debugfs_add(link); +} diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index a7e9d8d518f97..99e688dcabd69 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -10,6 +10,12 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else static inline void ieee80211_debugfs_add_netdev( struct ieee80211_sub_if_data *sdata) @@ -20,6 +26,16 @@ static inline void ieee80211_debugfs_remove_netdev( static inline void ieee80211_debugfs_rename_netdev( struct ieee80211_sub_if_data *sdata) {} + +static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{} + +static inline void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{} #endif #endif /* __IEEE80211_DEBUGFS_NETDEV_H */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index cfb09e4aed4d3..30cd0c905a24f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -8,6 +8,7 @@ #include "trace.h" #include "driver-ops.h" #include "debugfs_sta.h" +#include "debugfs_netdev.h" int drv_start(struct ieee80211_local *local) { @@ -477,6 +478,10 @@ int drv_change_vif_links(struct ieee80211_local *local, u16 old_links, u16 new_links, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) { + struct ieee80211_link_data *link; + unsigned long links_to_add; + unsigned long links_to_rem; + unsigned int link_id; int ret = -EOPNOTSUPP; might_sleep(); @@ -487,13 +492,31 @@ int drv_change_vif_links(struct ieee80211_local *local, if (old_links == new_links) return 0; + links_to_add = ~old_links & new_links; + links_to_rem = old_links & ~new_links; + + for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_remove(link); + } + trace_drv_change_vif_links(local, sdata, old_links, new_links); if (local->ops->change_vif_links) ret = local->ops->change_vif_links(&local->hw, &sdata->vif, old_links, new_links, old); trace_drv_return_int(local, ret); - return ret; + if (ret) + return ret; + + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_add(link); + } + + return 0; } int drv_change_sta_links(struct ieee80211_local *local, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d13a3dfd3664..a68d606e69872 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -465,6 +465,22 @@ static inline void drv_sta_remove(struct ieee80211_local *local, } #ifdef CONFIG_MAC80211_DEBUGFS +static inline void drv_link_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct dentry *dir) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->link_add_debugfs) + local->ops->link_add_debugfs(&local->hw, &sdata->vif, + link_conf, dir); +} + static inline void drv_sta_add_debugfs(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecc232eb1ee82..3d4edc25a69e0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -999,6 +999,10 @@ struct ieee80211_link_data { struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct ieee80211_bss_conf *conf; + +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/link.c b/net/mac80211/link.c index e6f9fce1dadb9..e82db88a47f8f 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -10,6 +10,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "key.h" +#include "debugfs_netdev.h" void ieee80211_link_setup(struct ieee80211_link_data *link) { @@ -61,6 +62,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, default: WARN_ON(1); } + + ieee80211_link_debugfs_add(link); } } @@ -94,6 +97,7 @@ static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!link)) continue; ieee80211_remove_link_keys(link, &keys); + ieee80211_link_debugfs_remove(link); ieee80211_link_stop(link); } -- GitLab From 5cf10940a47985260ebbdc4665a8081c6b575e17 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 1 Mar 2023 12:09:28 +0200 Subject: [PATCH 0161/2078] wifi: mac80211_hwsim: Indicate support for NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT mac80211_hwsim always uses the probe request coming from mac80211, which has support for minimal probe request content. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.67d31166ffd1.I908b68c5127525da3388c0d61a3d5d221ba451f6@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4cc4eaf80b146..e9b9340a97dcc 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4446,6 +4446,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT); + hw->wiphy->interface_modes = param->iftypes; /* ask mac80211 to reserve space for magic */ -- GitLab From 15f9b3ef5190bacc8a39628bc57c6da085877ffe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:30 +0200 Subject: [PATCH 0162/2078] wifi: mac80211: mlme: remove pointless sta check We already exited the function if sta ended up NULL, so just remove the extra check. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.4cbac9cfd03a.I21ec81c96d246afdabc2b0807d3856e6b1182cb7@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cdf842c0849b1..78adbacf85383 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5893,7 +5893,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - if (sta && elems->opmode_notif) + if (elems->opmode_notif) ieee80211_vht_handle_opmode(sdata, link_sta, *elems->opmode_notif, rx_status->band); -- GitLab From e8edb34640eeeefc74a3d767b61b4d4ac3b94eea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:31 +0200 Subject: [PATCH 0163/2078] wifi: mac80211: simplify reasoning about EHT capa handling Given the code in cfg80211, EHT capa cannot be non-NULL when HE capa is NULL, but it's easier to reason about it if both are checked and the compiler will likely integrate the check with the previous one for HE capa anyway. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.7413d50d23bc.I6fef7484721be9bd5364f64921fc5e9168495f62@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7e90f4a81962c..2c8fa19c2f248 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,7 +1788,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, (void *)params->he_6ghz_capa, link_sta); - if (params->eht_capa) + if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, -- GitLab From e820373a4fd06ebb6ed0331e97693048cfd16652 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:32 +0200 Subject: [PATCH 0164/2078] wifi: mac80211: fix ieee80211_link_set_associated() type The return type here should be u64 for the flags, even if it doesn't matter right now because it doesn't return any flags that don't fit into u32. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.d67ccae57d60.Ia4768e547ba8b1deb2b84ce3bbfbe216d5bfff6a@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 78adbacf85383..e13a0354c3970 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2744,7 +2744,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, return changed; } -static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link, +static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link, struct cfg80211_bss *cbss) { struct ieee80211_sub_if_data *sdata = link->sdata; -- GitLab From 18cbf7c089ba70fefe1b4c01af28753cabfbf38f Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:34 +0200 Subject: [PATCH 0165/2078] wifi: radiotap: Add EHT radiotap definitions This is based on https://www.radiotap.org/fields/EHT.html and https://www.radiotap.org/fields/U-SIG.html new EHT TLV definition for 11be standard. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.254b19fffe41.I4ce78e2c558da6e5a708a8d68d61b5d7b3eb3746@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 187 ++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 598f53d2a3a0b..0fc2667a9a5d2 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019, 2021 Intel Corporation + * Copyright (c) 2018-2019, 2021-2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -82,11 +82,14 @@ enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_HE_MU = 24, IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26, IEEE80211_RADIOTAP_LSIG = 27, + IEEE80211_RADIOTAP_TLV = 28, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, IEEE80211_RADIOTAP_VENDOR_NAMESPACE = 30, - IEEE80211_RADIOTAP_EXT = 31 + IEEE80211_RADIOTAP_EXT = 31, + IEEE80211_RADIOTAP_EHT_USIG = 33, + IEEE80211_RADIOTAP_EHT = 34, }; /* for IEEE80211_RADIOTAP_FLAGS */ @@ -360,6 +363,186 @@ enum ieee80211_radiotap_zero_len_psdu_type { IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff, }; +struct ieee80211_radiotap_tlv { + __le16 type; + __le16 len; + u8 data[]; +} __packed; + +/* ieee80211_radiotap_eht_usig - content of U-SIG tlv (type 33) + * see www.radiotap.org/fields/U-SIG.html for details + */ +struct ieee80211_radiotap_eht_usig { + __le32 common; + __le32 value; + __le32 mask; +} __packed; + +/* ieee80211_radiotap_eht - content of EHT tlv (type 34) + * see www.radiotap.org/fields/EHT.html for details + */ +struct ieee80211_radiotap_eht { + __le32 known; + __le32 data[9]; + __le32 user_info[]; +} __packed; + +/* Known field for EHT TLV + * The ending defines for what the field applies as following + * O - OFDMA (including TB), M - MU-MIMO, S - EHT sounding. + */ +enum ieee80211_radiotap_eht_known { + IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE = 0x00000002, + IEEE80211_RADIOTAP_EHT_KNOWN_GI = 0x00000004, + IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF = 0x00000010, + IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM = 0x00000020, + IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM = 0x00000040, + IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM = 0x00000080, + IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_O = 0x00000100, + IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_S = 0x00000200, + IEEE80211_RADIOTAP_EHT_KNOWN_CRC1 = 0x00002000, + IEEE80211_RADIOTAP_EHT_KNOWN_TAIL1 = 0x00004000, + IEEE80211_RADIOTAP_EHT_KNOWN_CRC2_O = 0x00008000, + IEEE80211_RADIOTAP_EHT_KNOWN_TAIL2_O = 0x00010000, + IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S = 0x00020000, + IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S = 0x00040000, + IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M = 0x00080000, + IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_CRC_M = 0x00100000, + IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_TAIL_M = 0x00200000, + IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_SIZE_OM = 0x00400000, + IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_INDEX_OM = 0x00800000, + IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT = 0x01000000, + IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80 = 0x02000000, +}; + +enum ieee80211_radiotap_eht_data { + /* Data 0 */ + IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE = 0x00000078, + IEEE80211_RADIOTAP_EHT_DATA0_GI = 0x00000180, + IEEE80211_RADIOTAP_EHT_DATA0_LTF = 0x00000600, + IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF = 0x00003800, + IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM = 0x00004000, + IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM = 0x00018000, + IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM = 0x00020000, + IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_S = 0x000c0000, + IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_O = 0x003c0000, + IEEE80211_RADIOTAP_EHT_DATA0_CRC1_O = 0x03c00000, + IEEE80211_RADIOTAP_EHT_DATA0_TAIL1_O = 0xfc000000, + /* Data 1 */ + IEEE80211_RADIOTAP_EHT_DATA1_RU_SIZE = 0x0000001f, + IEEE80211_RADIOTAP_EHT_DATA1_RU_INDEX = 0x00001fe0, + IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1 = 0x003fe000, + IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1_KNOWN = 0x00400000, + IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80 = 0xc0000000, + /* Data 2 */ + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1 = 0x000001ff, + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1_KNOWN = 0x00000200, + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2 = 0x0007fc00, + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2_KNOWN = 0x00080000, + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2 = 0x1ff00000, + IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2_KNOWN = 0x20000000, + /* Data 3 */ + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1 = 0x000001ff, + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1_KNOWN = 0x00000200, + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1 = 0x0007fc00, + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1_KNOWN = 0x00080000, + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2 = 0x1ff00000, + IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2_KNOWN = 0x20000000, + /* Data 4 */ + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2 = 0x000001ff, + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2_KNOWN = 0x00000200, + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3 = 0x0007fc00, + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3_KNOWN = 0x00080000, + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3 = 0x1ff00000, + IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3_KNOWN = 0x20000000, + /* Data 5 */ + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4 = 0x000001ff, + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4_KNOWN = 0x00000200, + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4 = 0x0007fc00, + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4_KNOWN = 0x00080000, + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5 = 0x1ff00000, + IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5_KNOWN = 0x20000000, + /* Data 6 */ + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5 = 0x000001ff, + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5_KNOWN = 0x00000200, + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6 = 0x0007fc00, + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6_KNOWN = 0x00080000, + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6 = 0x1ff00000, + IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6_KNOWN = 0x20000000, + /* Data 7 */ + IEEE80211_RADIOTAP_EHT_DATA7_CRC2_O = 0x0000000f, + IEEE80211_RADIOTAP_EHT_DATA7_TAIL_2_O = 0x000003f0, + IEEE80211_RADIOTAP_EHT_DATA7_NSS_S = 0x0000f000, + IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S = 0x00010000, + IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS = 0x000e0000, + IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_CRC = 0x00f00000, + IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_TAIL = 0x3f000000, + /* Data 8 */ + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160 = 0x00000001, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0 = 0x00000002, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1 = 0x000001fc, +}; + +enum ieee80211_radiotap_eht_user_info { + IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN = 0x00000001, + IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN = 0x00000002, + IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN = 0x00000004, + IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O = 0x00000010, + IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O = 0x00000020, + IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_KNOWN_M = 0x00000040, + IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER = 0x00000080, + IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID = 0x0007ff00, + IEEE80211_RADIOTAP_EHT_USER_INFO_CODING = 0x00080000, + IEEE80211_RADIOTAP_EHT_USER_INFO_MCS = 0x00f00000, + IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O = 0x0f000000, + IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O = 0x20000000, + IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_M = 0x3f000000, + IEEE80211_RADIOTAP_EHT_USER_INFO_RESEVED_c0000000 = 0xc0000000, +}; + +enum ieee80211_radiotap_eht_usig_common { + IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN = 0x00000001, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN = 0x00000002, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN = 0x00000004, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN = 0x00000008, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN = 0x00000010, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC = 0x00000020, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000, +}; + +enum ieee80211_radiotap_eht_usig_mu { + /* MU-USIG-1 */ + IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD = 0x0000001f, + IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE = 0x00000020, + /* MU-USIG-2 */ + IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE = 0x000000c0, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE = 0x00000100, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO = 0x00003e00, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE = 0x00004000, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS = 0x00018000, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS = 0x003e0000, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC = 0x03c00000, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B20_B25_TAIL = 0xfc000000, +}; + +enum ieee80211_radiotap_eht_usig_tb { + /* TB-USIG-1 */ + IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD = 0x0000001f, + + /* TB-USIG-2 */ + IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE = 0x000000c0, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE = 0x00000100, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1 = 0x00001e00, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2 = 0x0001e000, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD = 0x003e0000, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC = 0x03c00000, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B20_B25_TAIL = 0xfc000000, +}; + /** * ieee80211_get_radiotap_len - get radiotap header length */ -- GitLab From 9179dff82598ab8b4e88dcc93c9e26a2594efd1a Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Wed, 1 Mar 2023 12:09:35 +0200 Subject: [PATCH 0166/2078] wifi: mac80211: add support for driver adding radiotap TLVs The new TLV format enables adding TLVs after the fixed fields in radiotap, as part of the radiotap header. Support this and move vendor data to the TLV format, allowing a reuse of the RX_FLAG_RADIOTAP_VENDOR_DATA as the new RX_FLAG_RADIOTAP_TLV_AT_END flag. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.b18fd5da8477.I576400ec40a7b35ef97a3b09a99b3a49e9174786@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 27 +++--- drivers/net/wireless/mac80211_hwsim.c | 45 +++++----- include/net/ieee80211_radiotap.h | 20 +++++ include/net/mac80211.h | 44 ++-------- net/mac80211/rx.c | 82 ++++++------------- 5 files changed, 91 insertions(+), 127 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 549dbe0be223a..d1769464d75bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -209,33 +209,34 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_vendor_radiotap *radiotap; - const int size = sizeof(*radiotap) + sizeof(__le16); + struct ieee80211_radiotap_vendor_tlv *radiotap; + const u16 vendor_data_len = sizeof(mvm->cur_aid); + const u16 padding = ALIGN(vendor_data_len, 4) - vendor_data_len; if (!mvm->cur_aid) return; - /* ensure alignment */ - BUILD_BUG_ON((size + 2) % 4); + radiotap = skb_put(skb, sizeof(*radiotap) + vendor_data_len + padding); + radiotap->type = cpu_to_le16(IEEE80211_RADIOTAP_VENDOR_NAMESPACE); + radiotap->len = cpu_to_le16(sizeof(*radiotap) - + sizeof(struct ieee80211_radiotap_tlv) + + vendor_data_len); - radiotap = skb_put(skb, size + 2); - radiotap->align = 1; /* Intel OUI */ radiotap->oui[0] = 0xf6; radiotap->oui[1] = 0x54; radiotap->oui[2] = 0x25; /* radiotap sniffer config sub-namespace */ - radiotap->subns = 1; - radiotap->present = 0x1; - radiotap->len = size - sizeof(*radiotap); - radiotap->pad = 2; - + radiotap->oui_subtype = 1; + radiotap->vendor_type = 0; + /* clear reserved field */ + radiotap->reserved = 0; /* fill the data now */ memcpy(radiotap->data, &mvm->cur_aid, sizeof(mvm->cur_aid)); /* and clear the padding */ - memset(radiotap->data + sizeof(__le16), 0, radiotap->pad); + memset(radiotap->data + vendor_data_len, 0, padding); - rx_status->flag |= RX_FLAG_RADIOTAP_VENDOR_DATA; + rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; } /* iwl_mvm_pass_packet_to_mac80211 - passes the packet for mac80211 */ diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e9b9340a97dcc..152617034d19e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1534,37 +1534,38 @@ static void mac80211_hwsim_add_vendor_rtap(struct sk_buff *skb) * the values accordingly. */ #ifdef HWSIM_RADIOTAP_OUI - struct ieee80211_vendor_radiotap *rtap; + struct ieee80211_radiotap_vendor_tlv *rtap; + static const char vendor_data[8] = "ABCDEFGH"; + + // Make sure no padding is needed + BUILD_BUG_ON(sizeof(vendor_data) % 4); + /* this is last radiotap info before the mac header, so + * skb_reset_mac_header for mac8022 to know the end of + * the radiotap TLV/beginning of the 802.11 header + */ + skb_reset_mac_header(skb); /* * Note that this code requires the headroom in the SKB * that was allocated earlier. */ - rtap = skb_push(skb, sizeof(*rtap) + 8 + 4); + rtap = skb_push(skb, sizeof(*rtap) + sizeof(vendor_data)); + + rtap->len = cpu_to_le16(sizeof(*rtap) - + sizeof(struct ieee80211_radiotap_tlv) + + sizeof(vendor_data)); + rtap->type = cpu_to_le16(IEEE80211_RADIOTAP_VENDOR_NAMESPACE); + rtap->oui[0] = HWSIM_RADIOTAP_OUI[0]; rtap->oui[1] = HWSIM_RADIOTAP_OUI[1]; rtap->oui[2] = HWSIM_RADIOTAP_OUI[2]; - rtap->subns = 127; + rtap->oui_subtype = 127; + /* clear reserved field */ + rtap->reserved = 0; + rtap->vendor_type = 0; + memcpy(rtap->data, vendor_data, sizeof(vendor_data)); - /* - * Radiotap vendor namespaces can (and should) also be - * split into fields by using the standard radiotap - * presence bitmap mechanism. Use just BIT(0) here for - * the presence bitmap. - */ - rtap->present = BIT(0); - /* We have 8 bytes of (dummy) data */ - rtap->len = 8; - /* For testing, also require it to be aligned */ - rtap->align = 8; - /* And also test that padding works, 4 bytes */ - rtap->pad = 4; - /* push the data */ - memcpy(rtap->data, "ABCDEFGH", 8); - /* make sure to clear padding, mac80211 doesn't */ - memset(rtap->data + 8, 0, 4); - - IEEE80211_SKB_RXCB(skb)->flag |= RX_FLAG_RADIOTAP_VENDOR_DATA; + IEEE80211_SKB_RXCB(skb)->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; #endif } diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 0fc2667a9a5d2..95436686d3fe6 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -369,6 +369,26 @@ struct ieee80211_radiotap_tlv { u8 data[]; } __packed; +/** + * struct ieee80211_radiotap_vendor_tlv - vendor radiotap data information + * @type: should always be set to IEEE80211_RADIOTAP_VENDOR_NAMESPACE + * @len: length of data + * @oui: radiotap vendor namespace OUI + * @oui_subtype: radiotap vendor sub namespace + * @vendor_type: radiotap vendor type + * @reserved: should always be set to zero (to avoid leaking memory) + * @data: the actual vendor namespace data + */ +struct ieee80211_radiotap_vendor_tlv { + __le16 type; /* IEEE80211_RADIOTAP_VENDOR_NAMESPACE */ + __le16 len; + u8 oui[3]; + u8 oui_subtype; + __le16 vendor_type; + __le16 reserved; + u8 data[]; +} __packed; + /* ieee80211_radiotap_eht_usig - content of U-SIG tlv (type 33) * see www.radiotap.org/fields/U-SIG.html for details */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5df9eb828a581..3a43ce5fd4ec8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1375,9 +1375,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * subframes share the same sequence number. Reported subframes can be * either regular MSDU or singly A-MSDUs. Subframes must not be * interleaved with other frames. - * @RX_FLAG_RADIOTAP_VENDOR_DATA: This frame contains vendor-specific - * radiotap data in the skb->data (before the frame) as described by - * the &struct ieee80211_vendor_radiotap. + * @RX_FLAG_RADIOTAP_TLV_AT_END: This frame contains radiotap TLVs in the + * skb->data (before the 802.11 header). + * If used, the SKB's mac_header pointer must be set to point + * to the 802.11 header after the TLVs, and any padding added after TLV + * data to align to 4 must be cleared by the driver putting the TLVs + * in the skb. * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. @@ -1429,7 +1432,7 @@ enum mac80211_rx_flags { RX_FLAG_ONLY_MONITOR = BIT(17), RX_FLAG_SKIP_MONITOR = BIT(18), RX_FLAG_AMSDU_MORE = BIT(19), - RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(20), + RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20), RX_FLAG_MIC_STRIPPED = BIT(21), RX_FLAG_ALLOW_SAME_PN = BIT(22), RX_FLAG_ICV_STRIPPED = BIT(23), @@ -1569,39 +1572,6 @@ ieee80211_rx_status_to_khz(struct ieee80211_rx_status *rx_status) (rx_status->freq_offset ? 500 : 0); } -/** - * struct ieee80211_vendor_radiotap - vendor radiotap data information - * @present: presence bitmap for this vendor namespace - * (this could be extended in the future if any vendor needs more - * bits, the radiotap spec does allow for that) - * @align: radiotap vendor namespace alignment. This defines the needed - * alignment for the @data field below, not for the vendor namespace - * description itself (which has a fixed 2-byte alignment) - * Must be a power of two, and be set to at least 1! - * @oui: radiotap vendor namespace OUI - * @subns: radiotap vendor sub namespace - * @len: radiotap vendor sub namespace skip length, if alignment is done - * then that's added to this, i.e. this is only the length of the - * @data field. - * @pad: number of bytes of padding after the @data, this exists so that - * the skb data alignment can be preserved even if the data has odd - * length - * @data: the actual vendor namespace data - * - * This struct, including the vendor data, goes into the skb->data before - * the 802.11 header. It's split up in mac80211 using the align/oui/subns - * data. - */ -struct ieee80211_vendor_radiotap { - u32 present; - u8 align; - u8 oui[3]; - u8 subns; - u8 pad; - u16 len; - u8 data[]; -} __packed; - /** * enum ieee80211_conf_flags - configuration flags * diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f63ed6b91d9b5..0255c5745e1ce 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -55,7 +55,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, /* After pulling radiotap header, clear all flags that indicate * info in skb->data. */ - status->flag &= ~(RX_FLAG_RADIOTAP_VENDOR_DATA | + status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END | RX_FLAG_RADIOTAP_LSIG | RX_FLAG_RADIOTAP_HE_MU | RX_FLAG_RADIOTAP_HE); @@ -126,9 +126,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); - /* vendor presence bitmap */ - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) - len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -190,34 +187,28 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 2 * hweight8(status->chains); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - struct ieee80211_vendor_radiotap *rtap; - int vendor_data_offset = 0; + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + int tlv_offset = 0; /* * The position to look at depends on the existence (or non- * existence) of other elements, so take that into account... */ if (status->flag & RX_FLAG_RADIOTAP_HE) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he); if (status->flag & RX_FLAG_RADIOTAP_HE_MU) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he_mu); if (status->flag & RX_FLAG_RADIOTAP_LSIG) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_lsig); - rtap = (void *)&skb->data[vendor_data_offset]; + /* ensure 4 byte alignment for TLV */ + len = ALIGN(len, 4); - /* alignment for fixed 6-byte vendor data header */ - len = ALIGN(len, 2); - /* vendor data header */ - len += 6; - if (WARN_ON(rtap->align == 0)) - rtap->align = 1; - len = ALIGN(len, rtap->align); - len += rtap->len + rtap->pad; + /* TLVs until the mac header */ + len += skb_mac_header(skb) - &skb->data[tlv_offset]; } return len; @@ -313,9 +304,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u32 it_present_val; u16 rx_flags = 0; u16 channel_flags = 0; + u32 tlvs_len = 0; int mpdulen, chain; unsigned long chains = status->chains; - struct ieee80211_vendor_radiotap rtap = {}; struct ieee80211_radiotap_he he = {}; struct ieee80211_radiotap_he_mu he_mu = {}; struct ieee80211_radiotap_lsig lsig = {}; @@ -336,18 +327,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, skb_pull(skb, sizeof(lsig)); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - rtap = *(struct ieee80211_vendor_radiotap *)skb->data; - /* rtap.len and rtap.pad are undone immediately */ - skb_pull(skb, sizeof(rtap) + rtap.len + rtap.pad); + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + /* data is pointer at tlv all other info was pulled off */ + tlvs_len = skb_mac_header(skb) - skb->data; } mpdulen = skb->len; if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; - rthdr = skb_push(skb, rtap_len); - memset(rthdr, 0, rtap_len - rtap.len - rtap.pad); + rthdr = skb_push(skb, rtap_len - tlvs_len); + memset(rthdr, 0, rtap_len - tlvs_len); it_present = &rthdr->it_present; /* radiotap header, set always present flags */ @@ -369,13 +359,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) | - BIT(IEEE80211_RADIOTAP_EXT); - put_unaligned_le32(it_present_val, it_present); - it_present++; - it_present_val = rtap.present; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + it_present_val |= BIT(IEEE80211_RADIOTAP_TLV); put_unaligned_le32(it_present_val, it_present); @@ -706,22 +691,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos++ = status->chain_signal[chain]; *pos++ = chain; } - - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - /* ensure 2 byte alignment for the vendor field as required */ - if ((pos - (u8 *)rthdr) & 1) - *pos++ = 0; - *pos++ = rtap.oui[0]; - *pos++ = rtap.oui[1]; - *pos++ = rtap.oui[2]; - *pos++ = rtap.subns; - put_unaligned_le16(rtap.len, pos); - pos += 2; - /* align the actual payload as requested */ - while ((pos - (u8 *)rthdr) & (rtap.align - 1)) - *pos++ = 0; - /* data (and possible padding) already follows */ - } } static struct sk_buff * @@ -797,6 +766,13 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, bool only_monitor = false; unsigned int min_head_len; + if (WARN_ON_ONCE(status->flag & RX_FLAG_RADIOTAP_TLV_AT_END && + !skb_mac_header_was_set(origskb))) { + /* with this skb no way to know where frame payload starts */ + dev_kfree_skb(origskb); + return NULL; + } + if (status->flag & RX_FLAG_RADIOTAP_HE) rtap_space += sizeof(struct ieee80211_radiotap_he); @@ -806,12 +782,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (status->flag & RX_FLAG_RADIOTAP_LSIG) rtap_space += sizeof(struct ieee80211_radiotap_lsig); - if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { - struct ieee80211_vendor_radiotap *rtap = - (void *)(origskb->data + rtap_space); - - rtap_space += sizeof(*rtap) + rtap->len + rtap->pad; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + rtap_space += skb_mac_header(origskb) - &origskb->data[rtap_space]; min_head_len = rtap_space; -- GitLab From 5383bfff5261422f843de72a4089da9b152291b0 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Sat, 18 Feb 2023 01:50:05 +0800 Subject: [PATCH 0167/2078] wifi: mac80211: introduce ieee80211_refresh_tx_agg_session_timer() This allows low level drivers to refresh the tx agg session timer, based on querying stats from the firmware usually. Especially for some mt76 devices support .net_fill_forward_path would bypass mac80211, which leads to tx BA session timeout clients that set a timeout in their AddBA response to our request, even if our request is without a timeout. Signed-off-by: Ryder Lee Link: https://lore.kernel.org/r/7c3f72eac1c34921cd84a462e60d71e125862152.1676616450.git.ryder.lee@mediatek.com [slightly clarify commit message, add note about RCU] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 ++++++++++++++ net/mac80211/agg-tx.c | 17 +++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a43ce5fd4ec8..722b99e54ad20 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5973,6 +5973,20 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, struct delayed_work *dwork, unsigned long delay); +/** + * ieee80211_refresh_tx_agg_session_timer - Refresh a tx agg session timer. + * @sta: the station for which to start a BA session + * @tid: the TID to BA on. + * + * This function allows low level driver to refresh tx agg session timer + * to maintain BA session, the session level will still be managed by the + * mac80211. + * + * Note: must be called in an RCU critical section. + */ +void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *sta, + u16 tid); + /** * ieee80211_start_tx_ba_session - Start a tx Block Ack session. * @sta: the station for which to start a BA session diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index f9514bacbd4a1..3b651e7f5a73b 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -554,6 +554,23 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ieee80211_send_addba_with_timeout(sta, tid_tx); } +void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *pubsta, + u16 tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct tid_ampdu_tx *tid_tx; + + if (WARN_ON_ONCE(tid >= IEEE80211_NUM_TIDS)) + return; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx) + return; + + tid_tx->last_tx = jiffies; +} +EXPORT_SYMBOL(ieee80211_refresh_tx_agg_session_timer); + /* * After accepting the AddBA Response we activated a timer, * resetting it after each frame that we send. -- GitLab From f4d1181e4759c9c6c97c86cda2cf2d1ddb6a74d2 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Sat, 18 Feb 2023 01:48:59 +0800 Subject: [PATCH 0168/2078] wifi: mac80211: add EHT MU-MIMO related flags in ieee80211_bss_conf Similar to VHT/HE. This is utilized to pass MU-MIMO configurations from user space (i.e. hostapd) to driver. Signed-off-by: Ryder Lee Link: https://lore.kernel.org/r/8d9966c4c1e77cb1ade77d42bdc49905609192e9.1676628065.git.ryder.lee@mediatek.com [move into combined if statement, reset on !eht] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 +++++++++ net/mac80211/cfg.c | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 722b99e54ad20..410cd3daaa59d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -674,6 +674,12 @@ struct ieee80211_fils_discovery { * @he_full_ul_mumimo: does this BSS support the reception (AP) or transmission * (non-AP STA) of an HE TB PPDU on an RU that spans the entire PPDU * bandwidth + * @eht_su_beamformer: in AP-mode, does this BSS enable operation as an EHT SU + * beamformer + * @eht_su_beamformee: in AP-mode, does this BSS enable operation as an EHT SU + * beamformee + * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU + * beamformer */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -761,6 +767,9 @@ struct ieee80211_bss_conf { bool he_su_beamformee; bool he_mu_beamformer; bool he_full_ul_mumimo; + bool eht_su_beamformer; + bool eht_su_beamformee; + bool eht_mu_beamformer; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2c8fa19c2f248..9789008626a50 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1299,6 +1299,22 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (params->eht_cap) { link_conf->eht_puncturing = params->punct_bitmap; changed |= BSS_CHANGED_EHT_PUNCTURING; + + link_conf->eht_su_beamformer = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; + link_conf->eht_su_beamformee = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; + link_conf->eht_mu_beamformer = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + } else { + link_conf->eht_su_beamformer = false; + link_conf->eht_su_beamformee = false; + link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && -- GitLab From 2ad7dd9425408bf0ca524102808059c05bad169e Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Sat, 18 Feb 2023 01:49:25 +0800 Subject: [PATCH 0169/2078] wifi: mac80211: add LDPC related flags in ieee80211_bss_conf This is utilized to pass LDPC configurations from user space (i.e. hostapd) to driver. Signed-off-by: Ryder Lee Link: https://lore.kernel.org/r/1de696aaa34efd77a926eb657b8c0fda05aaa177.1676628065.git.ryder.lee@mediatek.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/cfg.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 410cd3daaa59d..f12edca660ba0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -657,6 +657,9 @@ struct ieee80211_fils_discovery { * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. * @color_change_color: the bss color that will be used after the change. + * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability. + * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability. + * @he_ldpc: in AP mode, indicates interface has HE LDPC capability. * @vht_su_beamformer: in AP mode, does this BSS support operation as an VHT SU * beamformer * @vht_su_beamformee: in AP mode, does this BSS support operation as an VHT SU @@ -759,6 +762,9 @@ struct ieee80211_bss_conf { bool color_change_active; u8 color_change_color; + bool ht_ldpc; + bool vht_ldpc; + bool he_ldpc; bool vht_su_beamformer; bool vht_su_beamformee; bool vht_mu_beamformer; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9789008626a50..760ad934f9e12 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1252,7 +1252,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; + if (params->ht_cap) + link_conf->ht_ldpc = + params->ht_cap->cap_info & + cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); + if (params->vht_cap) { + link_conf->vht_ldpc = + params->vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); @@ -1282,6 +1290,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, } if (params->he_cap) { + link_conf->he_ldpc = + params->he_cap->phy_cap_info[1] & + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; -- GitLab From 6933486133ecf71bbe273d7ac72cfc4a51286af3 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 12 Jan 2023 06:54:13 +0530 Subject: [PATCH 0170/2078] wifi: nl80211: Add support for randomizing TA of auth and deauth frames Add support to use a random local address in authentication and deauthentication frames sent to unassociated peer when the driver supports. The driver needs to configure receive behavior to accept frames with random transmit address specified in TX path authentication frames during the time of the frame exchange is pending and such frames need to be acknowledged similarly to frames sent to the local permanent address when this random address functionality is used. This capability allows use of randomized transmit address for PASN authentication frames to improve privacy of WLAN clients. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230112012415.167556-2-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 ++++ net/wireless/mlme.c | 55 +++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c8520c150f9c4..9a0ac0363f1fc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6348,6 +6348,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SECURE_NAN: Device supports NAN Pairing which enables * authentication, data encryption and message integrity. * + * @NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA: Device supports randomized TA + * in authentication and deauthentication frames sent to unassociated peer + * using @NL80211_CMD_FRAME. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6418,6 +6422,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE, NL80211_EXT_FEATURE_PUNCT, NL80211_EXT_FEATURE_SECURE_NAN, + NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 81d3f40d62357..ac059cefbeb39 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -673,6 +673,39 @@ static bool cfg80211_allowed_address(struct wireless_dev *wdev, const u8 *addr) return ether_addr_equal(addr, wdev_address(wdev)); } +static bool cfg80211_allowed_random_address(struct wireless_dev *wdev, + const struct ieee80211_mgmt *mgmt) +{ + if (ieee80211_is_auth(mgmt->frame_control) || + ieee80211_is_deauth(mgmt->frame_control)) { + /* Allow random TA to be used with authentication and + * deauthentication frames if the driver has indicated support. + */ + if (wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA)) + return true; + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { + /* Allow random TA to be used with Public Action frames if the + * driver has indicated support. + */ + if (!wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) + return true; + + if (wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) + return true; + } + + return false; +} + int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -774,25 +807,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return err; } - if (!cfg80211_allowed_address(wdev, mgmt->sa)) { - /* Allow random TA to be used with Public Action frames if the - * driver has indicated support for this. Otherwise, only allow - * the local address to be used. - */ - if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) - return -EINVAL; - if (!wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) - return -EINVAL; - if (wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) - return -EINVAL; - } + if (!cfg80211_allowed_address(wdev, mgmt->sa) && + !cfg80211_allowed_random_address(wdev, mgmt)) + return -EINVAL; /* Transmit the management frame as requested by user space */ return rdev_mgmt_tx(rdev, wdev, params, cookie); -- GitLab From 2d5bcdcda8799cf21f9ab84598c946dd320207a2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 7 Mar 2023 08:14:06 -0700 Subject: [PATCH 0171/2078] bpf: Increase size of BTF_ID_LIST without CONFIG_DEBUG_INFO_BTF again After commit 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr"), clang builds without CONFIG_DEBUG_INFO_BTF warn: kernel/bpf/verifier.c:10298:24: warning: array index 16 is past the end of the array (that has type 'u32[16]' (aka 'unsigned int[16]')) [-Warray-bounds] meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) { ^ ~~~~~~~~~~~~~~~~~~~~~~~~ kernel/bpf/verifier.c:9150:1: note: array 'special_kfunc_list' declared here BTF_ID_LIST(special_kfunc_list) ^ include/linux/btf_ids.h:207:27: note: expanded from macro 'BTF_ID_LIST' #define BTF_ID_LIST(name) static u32 __maybe_unused name[16]; ^ 1 warning generated. A warning of this nature was previously addressed by commit beb3d47d1d3d ("bpf: Fix a BTF_ID_LIST bug with CONFIG_DEBUG_INFO_BTF not set") but there have been new kfuncs added since then. Quadruple the size of the CONFIG_DEBUG_INFO_BTF=n definition so that this problem is unlikely to show up for some time. Link: https://github.com/ClangBuiltLinux/linux/issues/1810 Signed-off-by: Nathan Chancellor Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/20230307-bpf-kfuncs-warray-bounds-v1-1-00ad3191f3a6@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3a4f7cd882cac..00950cc03bff2 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -204,7 +204,7 @@ extern struct btf_id_set8 name; #else -#define BTF_ID_LIST(name) static u32 __maybe_unused name[16]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; #define BTF_ID(prefix, name) #define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED -- GitLab From 14e05beb10326eeb997ec5ebbf10edf08e85c3f8 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 28 Feb 2023 08:28:27 -0800 Subject: [PATCH 0172/2078] wifi: nl80211: convert cfg80211_scan_request allocation to *_size macros The cfg80211_scan_request structure is followed by a flexible array member as well as several other arrays that are then stored into pointers in the structure. These are allocated currently using a simple sequence of multiplications. Replace the calculations with struct_size(), size_add(), and array_size() helper macros. These macros saturate the calculation at SIZE_MAX rather than overflowing. Note that we can't use flex_array_size() instead of array_size() because the fields are not arrays, but simple pointers. Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20230228162827.3876606-3-jacob.e.keller@intel.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85f714e1af871..0a31b1d2845d5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9027,7 +9027,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; - size_t ie_len; + size_t ie_len, size; wiphy = &rdev->wiphy; @@ -9072,10 +9072,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; @@ -9408,7 +9408,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum nl80211_band band; - size_t ie_len; + size_t ie_len, size; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; @@ -9517,12 +9517,14 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) return ERR_PTR(-EINVAL); - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->match_sets) * n_match_sets - + sizeof(*request->scan_plans) * n_plans - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, array_size(sizeof(*request->match_sets), + n_match_sets)); + size = size_add(size, array_size(sizeof(*request->scan_plans), + n_plans)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); -- GitLab From eccfe0176cc11432f1bc7e8b035164d6263df715 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:15 +0200 Subject: [PATCH 0173/2078] wifi: iwlwifi: mvm: add LSIG info to radio tap info in EHT Nothing change with LSIG in EHT so just extend the switch case to include EHT standard. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124406.53edcfc049fb.Iec0401faead108aa5130c23c604cfb88ef2943de@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index d1769464d75bc..3d7717fdb4d5e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1594,6 +1594,10 @@ static void iwl_mvm_decode_lsig(struct sk_buff *skb, case IWL_RX_PHY_INFO_TYPE_HE_MU: case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT: case IWL_RX_PHY_INFO_TYPE_HE_TB: + case IWL_RX_PHY_INFO_TYPE_EHT_MU: + case IWL_RX_PHY_INFO_TYPE_EHT_TB: + case IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT: + case IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT: lsig = skb_put(skb, sizeof(*lsig)); lsig->data1 = cpu_to_le16(IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN); lsig->data2 = le16_encode_bits(le32_get_bits(phy_data->d1, -- GitLab From f567b9b46c9e8ecc12190a109ed16006c9f441fa Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:16 +0200 Subject: [PATCH 0174/2078] wifi: iwlwifi: mvm: mark mac header with no data frames Although no data is presented in the skb, but upper layers need it for calculating where radio tap header are done, so we mark it. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124406.8f53cc8579bf.I8819c005b4953cea03346d0aff82b31d94f79fe5@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 3d7717fdb4d5e..ba9f79ada82f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -2147,11 +2147,8 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, * * We mark it as mac header, for upper layers to know where * all radio tap header ends. - * - * Since data doesn't move data while putting data on skb and that is - * the only way we use, data + len is the next place that hdr would be put */ - skb_set_mac_header(skb, skb->len); + skb_reset_mac_header(skb); /* * Override the nss from the rx_vec since the rate_n_flags has -- GitLab From 7f165fdf2967a5b7aa4b123cc9586e9f5068325b Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Sun, 5 Mar 2023 14:16:17 +0200 Subject: [PATCH 0175/2078] wifi: iwlwifi: Adding the code to get RF name for MsP device Add missing RF name extraction for MsP device from hardware rf-id. Signed-off-by: Mukesh Sisodiya Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124406.ae831f21ec55.Iad71cc30f9930f7ebe079d210ebe6c968a159273@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 3e1f011e93aaf..bece76b1a5145 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -348,6 +348,7 @@ enum { #define CSR_HW_RF_ID_TYPE_HRCDB (0x00109F00) #define CSR_HW_RF_ID_TYPE_GF (0x0010D000) #define CSR_HW_RF_ID_TYPE_GF4 (0x0010E000) +#define CSR_HW_RF_ID_TYPE_MS (0x00111000) /* HW_RF CHIP STEP */ #define CSR_HW_RF_STEP(_val) (((_val) >> 8) & 0xF) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index 94f40c4d24217..1e263154e9ebe 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include "iwl-trans.h" #include "iwl-prph.h" @@ -277,6 +277,9 @@ static void iwl_pcie_get_rf_name(struct iwl_trans *trans) case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HRCDB): pos = scnprintf(buf, buflen, "HRCDB"); break; + case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_MS): + pos = scnprintf(buf, buflen, "MS"); + break; default: return; } -- GitLab From 876882b51569bf686ae4c421b0a738b78a536aa3 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Sun, 5 Mar 2023 14:16:18 +0200 Subject: [PATCH 0176/2078] wifi: iwlwifi: reduce verbosity of some logging events These are cases in which we'd like to warn that something unexpected happened but they may not be errors. Reduce verbosity. Signed-off-by: Golan Ben Ami Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.5eea0f58a74f.Ifb6b35903a5a452a757bfe50b6a7e58b3fd2ef23@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 6 +++--- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 48e7376a5fea7..8e0bc1f5f6c60 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -350,9 +350,9 @@ void iwl_dbg_tlv_alloc(struct iwl_trans *trans, const struct iwl_ucode_tlv *tlv, ret = dbg_tlv_alloc[tlv_idx](trans, tlv); if (ret) { - IWL_ERR(trans, - "WRT: Failed to allocate TLV 0x%x, ret %d, (ext=%d)\n", - type, ret, ext); + IWL_WARN(trans, + "WRT: Failed to allocate TLV 0x%x, ret %d, (ext=%d)\n", + type, ret, ext); goto out_err; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 49ca1e168fc5b..ab9a51375c8ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -190,7 +190,7 @@ static u32 iwl_mvm_set_mac80211_rx_flag(struct iwl_mvm *mvm, default: /* Expected in monitor (not having the keys) */ if (!mvm->monitor_on) - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); + IWL_WARN(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); } return 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index ba9f79ada82f7..2db4f68becff2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -444,7 +444,7 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_sta *sta, */ if (!is_multicast_ether_addr(hdr->addr1) && !mvm->monitor_on && net_ratelimit()) - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status); + IWL_WARN(mvm, "Unhandled alg: 0x%x\n", status); } return 0; -- GitLab From 90a5527d7686d3ebe0dd2a831356a6c7d7dc31bc Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:45:58 +0000 Subject: [PATCH 0177/2078] bpf: add new map ops ->map_mem_usage Add a new map ops ->map_mem_usage to print the memory usage of a bpf map. This is a preparation for the followup change. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-2-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 15 +++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d3456804f7aa6..9059520bbb5e0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -161,6 +161,8 @@ struct bpf_map_ops { bpf_callback_t callback_fn, void *callback_ctx, u64 flags); + u64 (*map_mem_usage)(const struct bpf_map *map); + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ede5f987484f0..7c96e68859ec4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -771,16 +771,15 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) } #ifdef CONFIG_PROC_FS -/* Provides an approximation of the map's memory footprint. - * Used only to provide a backward compatibility and display - * a reasonable "memlock" info. - */ -static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) +/* Show the memory usage of a bpf map */ +static u64 bpf_map_memory_usage(const struct bpf_map *map) { unsigned long size; - size = round_up(map->key_size + bpf_map_value_size(map), 8); + if (map->ops->map_mem_usage) + return map->ops->map_mem_usage(map); + size = round_up(map->key_size + bpf_map_value_size(map), 8); return round_up(map->max_entries * size, PAGE_SIZE); } @@ -803,7 +802,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "max_entries:\t%u\n" "map_flags:\t%#x\n" "map_extra:\t%#llx\n" - "memlock:\t%lu\n" + "memlock:\t%llu\n" "map_id:\t%u\n" "frozen:\t%u\n", map->map_type, @@ -812,7 +811,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->max_entries, map->map_flags, (unsigned long long)map->map_extra, - bpf_map_memory_footprint(map), + bpf_map_memory_usage(map), map->id, READ_ONCE(map->frozen)); if (type) { -- GitLab From 41d5941e7f9a27f3d28220f08b843e8d87df1be4 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:45:59 +0000 Subject: [PATCH 0178/2078] bpf: lpm_trie memory usage trie_mem_usage() is introduced to calculate the lpm_trie memory usage. Some small memory allocations are ignored. The inner node is also ignored. The result as follows, - before 10: lpm_trie flags 0x1 key 8B value 8B max_entries 65536 memlock 1048576B - after 10: lpm_trie flags 0x1 key 8B value 8B max_entries 65536 memlock 2291536B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-3-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index d833496e9e426..dc23f2ac9cde1 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -720,6 +720,16 @@ static int trie_check_btf(const struct bpf_map *map, -EINVAL : 0; } +static u64 trie_mem_usage(const struct bpf_map *map) +{ + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + u64 elem_size; + + elem_size = sizeof(struct lpm_trie_node) + trie->data_size + + trie->map.value_size; + return elem_size * READ_ONCE(trie->n_entries); +} + BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie) const struct bpf_map_ops trie_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -733,5 +743,6 @@ const struct bpf_map_ops trie_map_ops = { .map_update_batch = generic_map_update_batch, .map_delete_batch = generic_map_delete_batch, .map_check_btf = trie_check_btf, + .map_mem_usage = trie_mem_usage, .map_btf_id = &trie_map_btf_ids[0], }; -- GitLab From 304849a27b341cf22d5c15096144cc8c1b69e0c0 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:00 +0000 Subject: [PATCH 0179/2078] bpf: hashtab memory usage htab_map_mem_usage() is introduced to calculate hashmap memory usage. In this helper, some small memory allocations are ignore, as their size is quite small compared with the total size. The inner_map_meta in hash_of_map is also ignored. The result for hashtab as follows, - before this change 1: hash name count_map flags 0x1 <<<< no prealloc, fully set key 16B value 24B max_entries 1048576 memlock 41943040B 2: hash name count_map flags 0x1 <<<< no prealloc, none set key 16B value 24B max_entries 1048576 memlock 41943040B 3: hash name count_map flags 0x0 <<<< prealloc key 16B value 24B max_entries 1048576 memlock 41943040B The memlock is always a fixed size whatever it is preallocated or not, and whatever the count of allocated elements is. - after this change 1: hash name count_map flags 0x1 <<<< non prealloc, fully set key 16B value 24B max_entries 1048576 memlock 117441536B 2: hash name count_map flags 0x1 <<<< non prealloc, non set key 16B value 24B max_entries 1048576 memlock 16778240B 3: hash name count_map flags 0x0 <<<< prealloc key 16B value 24B max_entries 1048576 memlock 109056000B The memlock now is hashtab actually allocated. The result for percpu hash map as follows, - before this change 4: percpu_hash name count_map flags 0x0 <<<< prealloc key 16B value 24B max_entries 1048576 memlock 822083584B 5: percpu_hash name count_map flags 0x1 <<<< no prealloc key 16B value 24B max_entries 1048576 memlock 822083584B - after this change 4: percpu_hash name count_map flags 0x0 key 16B value 24B max_entries 1048576 memlock 897582080B 5: percpu_hash name count_map flags 0x1 key 16B value 24B max_entries 1048576 memlock 922748736B At worst, the difference can be 10x, for example, - before this change 6: hash name count_map flags 0x0 key 4B value 4B max_entries 1048576 memlock 8388608B - after this change 6: hash name count_map flags 0x0 key 4B value 4B max_entries 1048576 memlock 83889408B Signed-off-by: Yafang Shao Acked-by: Hou Tao Link: https://lore.kernel.org/r/20230305124615.12358-4-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 653aeb481c794..0df4b0c10f595 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2190,6 +2190,44 @@ out: return num_elems; } +static u64 htab_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + u32 value_size = round_up(htab->map.value_size, 8); + bool prealloc = htab_is_prealloc(htab); + bool percpu = htab_is_percpu(htab); + bool lru = htab_is_lru(htab); + u64 num_entries; + u64 usage = sizeof(struct bpf_htab); + + usage += sizeof(struct bucket) * htab->n_buckets; + usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; + if (prealloc) { + num_entries = map->max_entries; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + usage += htab->elem_size * num_entries; + + if (percpu) + usage += value_size * num_possible_cpus() * num_entries; + else if (!lru) + usage += sizeof(struct htab_elem *) * num_possible_cpus(); + } else { +#define LLIST_NODE_SZ sizeof(struct llist_node) + + num_entries = htab->use_percpu_counter ? + percpu_counter_sum(&htab->pcount) : + atomic_read(&htab->count); + usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; + if (percpu) { + usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; + usage += value_size * num_possible_cpus() * num_entries; + } + } + return usage; +} + BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) const struct bpf_map_ops htab_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -2206,6 +2244,7 @@ const struct bpf_map_ops htab_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, @@ -2227,6 +2266,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru), .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, @@ -2378,6 +2418,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_percpu), .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, @@ -2397,6 +2438,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru_percpu), .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, @@ -2534,6 +2576,7 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], }; -- GitLab From 1746d0555a8795c7ecfeab6a2c3e3c824cf57535 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:01 +0000 Subject: [PATCH 0180/2078] bpf: arraymap memory usage Introduce array_map_mem_usage() to calculate arraymap memory usage. In this helper, some small memory allocations are ignored, like the allocation of struct bpf_array_aux in prog_array. The inner_map_meta in array_of_map is also ignored. The result as follows, - before 11: array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B 12: percpu_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 8912896B 13: perf_event_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B 14: prog_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B 15: cgroup_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B - after 11: array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524608B 12: percpu_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 17301824B 13: perf_event_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524608B 14: prog_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524608B 15: cgroup_array name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524608B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-5-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 4847069595569..1588c793a715c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -721,6 +721,28 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ return num_elems; } +static u64 array_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + u32 elem_size = array->elem_size; + u64 entries = map->max_entries; + u64 usage = sizeof(*array); + + if (percpu) { + usage += entries * sizeof(void *); + usage += entries * elem_size * num_possible_cpus(); + } else { + if (map->map_flags & BPF_F_MMAPABLE) { + usage = PAGE_ALIGN(usage); + usage += PAGE_ALIGN(entries * elem_size); + } else { + usage += entries * elem_size; + } + } + return usage; +} + BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) const struct bpf_map_ops array_map_ops = { .map_meta_equal = array_map_meta_equal, @@ -742,6 +764,7 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -762,6 +785,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -1156,6 +1180,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; @@ -1257,6 +1282,7 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; @@ -1291,6 +1317,7 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #endif @@ -1379,5 +1406,6 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, + .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; -- GitLab From cbb9b6068c68332eefb0ef4c0ebf6f96c35d9bde Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:02 +0000 Subject: [PATCH 0181/2078] bpf: stackmap memory usage A new helper is introduced to get stackmap memory usage. Some small memory allocations are ignored as their memory size is quite small compared to the totol usage. The result as follows, - before 16: stack_trace name count_map flags 0x0 key 4B value 8B max_entries 65536 memlock 1048576B - after 16: stack_trace name count_map flags 0x0 key 4B value 8B max_entries 65536 memlock 2097472B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-6-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/stackmap.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index aecea7451b610..0f1d8dced9337 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -654,6 +654,19 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } +static u64 stack_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + u64 value_size = map->value_size; + u64 n_buckets = smap->n_buckets; + u64 enties = map->max_entries; + u64 usage = sizeof(*smap); + + usage += n_buckets * sizeof(struct stack_map_bucket *); + usage += enties * (sizeof(struct stack_map_bucket) + value_size); + return usage; +} + BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) const struct bpf_map_ops stack_trace_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -664,5 +677,6 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_mem_usage = stack_map_mem_usage, .map_btf_id = &stack_trace_map_btf_ids[0], }; -- GitLab From 2e89caf055a64cb531cbd8ab7a01c3e9b0b3133d Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:03 +0000 Subject: [PATCH 0182/2078] bpf: reuseport_array memory usage A new helper is introduced to calculate reuseport_array memory usage. The result as follows, - before 14: reuseport_sockarray name count_map flags 0x0 key 4B value 8B max_entries 65536 memlock 1048576B - after 14: reuseport_sockarray name count_map flags 0x0 key 4B value 8B max_entries 65536 memlock 524544B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-7-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/reuseport_array.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 82c61612f382a..71cb72f5b7336 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -335,6 +335,13 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key, return 0; } +static u64 reuseport_array_mem_usage(const struct bpf_map *map) +{ + struct reuseport_array *array; + + return struct_size(array, ptrs, map->max_entries); +} + BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array) const struct bpf_map_ops reuseport_array_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -344,5 +351,6 @@ const struct bpf_map_ops reuseport_array_ops = { .map_lookup_elem = reuseport_array_lookup_elem, .map_get_next_key = reuseport_array_get_next_key, .map_delete_elem = reuseport_array_delete_elem, + .map_mem_usage = reuseport_array_mem_usage, .map_btf_id = &reuseport_array_map_btf_ids[0], }; -- GitLab From 2f7e4ab2caa9a692ffc843f801cea90632cdc782 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:04 +0000 Subject: [PATCH 0183/2078] bpf: ringbuf memory usage A new helper ringbuf_map_mem_usage() is introduced to calculate ringbuf memory usage. The result as follows, - before 15: ringbuf name count_map flags 0x0 key 0B value 0B max_entries 65536 memlock 0B - after 15: ringbuf name count_map flags 0x0 key 0B value 0B max_entries 65536 memlock 78424B Signed-off-by: Yafang Shao Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230305124615.12358-8-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 8732e0aadf366..0d2a45ff83f15 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -19,6 +19,7 @@ (offsetof(struct bpf_ringbuf, consumer_pos) >> PAGE_SHIFT) /* consumer page and producer page */ #define RINGBUF_POS_PAGES 2 +#define RINGBUF_NR_META_PAGES (RINGBUF_PGOFF + RINGBUF_POS_PAGES) #define RINGBUF_MAX_RECORD_SZ (UINT_MAX/4) @@ -96,7 +97,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) { const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | __GFP_ZERO; - int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES; + int nr_meta_pages = RINGBUF_NR_META_PAGES; int nr_data_pages = data_sz >> PAGE_SHIFT; int nr_pages = nr_meta_pages + nr_data_pages; struct page **pages, *page; @@ -336,6 +337,21 @@ static __poll_t ringbuf_map_poll_user(struct bpf_map *map, struct file *filp, return 0; } +static u64 ringbuf_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_ringbuf *rb; + int nr_data_pages; + int nr_meta_pages; + u64 usage = sizeof(struct bpf_ringbuf_map); + + rb = container_of(map, struct bpf_ringbuf_map, map)->rb; + usage += (u64)rb->nr_pages << PAGE_SHIFT; + nr_meta_pages = RINGBUF_NR_META_PAGES; + nr_data_pages = map->max_entries >> PAGE_SHIFT; + usage += (nr_meta_pages + 2 * nr_data_pages) * sizeof(struct page *); + return usage; +} + BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map) const struct bpf_map_ops ringbuf_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -347,6 +363,7 @@ const struct bpf_map_ops ringbuf_map_ops = { .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, .map_get_next_key = ringbuf_map_get_next_key, + .map_mem_usage = ringbuf_map_mem_usage, .map_btf_id = &ringbuf_map_btf_ids[0], }; @@ -361,6 +378,7 @@ const struct bpf_map_ops user_ringbuf_map_ops = { .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, .map_get_next_key = ringbuf_map_get_next_key, + .map_mem_usage = ringbuf_map_mem_usage, .map_btf_id = &user_ringbuf_map_btf_ids[0], }; -- GitLab From 71a49abe73cb56b15bf0701b5e98b6103480f762 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:05 +0000 Subject: [PATCH 0184/2078] bpf: bloom_filter memory usage Introduce a new helper to calculate the bloom_filter memory usage. The result as follows, - before 16: bloom_filter flags 0x0 key 0B value 8B max_entries 65536 memlock 524288B - after 16: bloom_filter flags 0x0 key 0B value 8B max_entries 65536 memlock 65856B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-9-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bloom_filter.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 48ee750849f25..6350c5d35a9be 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -193,6 +193,17 @@ static int bloom_map_check_btf(const struct bpf_map *map, return btf_type_is_void(key_type) ? 0 : -EINVAL; } +static u64 bloom_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom; + u64 bitset_bytes; + + bloom = container_of(map, struct bpf_bloom_filter, map); + bitset_bytes = BITS_TO_BYTES((u64)bloom->bitset_mask + 1); + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + return sizeof(*bloom) + bitset_bytes; +} + BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -206,5 +217,6 @@ const struct bpf_map_ops bloom_filter_map_ops = { .map_update_elem = bloom_map_update_elem, .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, + .map_mem_usage = bloom_map_mem_usage, .map_btf_id = &bpf_bloom_map_btf_ids[0], }; -- GitLab From 835f1fca951303930b2c74c5b0abe3c03a3aeadf Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:06 +0000 Subject: [PATCH 0185/2078] bpf: cpumap memory usage A new helper is introduced to calculate cpumap memory usage. The size of cpu_entries can be dynamically changed when we update or delete a cpumap element, but this patch doesn't include the memory size of cpu_entry yet. We can dynamically calculate the memory usage when we alloc or free a cpu_entry, but it will take extra runtime overhead, so let just put it aside currently. Note that the size of different cpu_entry may be different as well. The result as follows, - before 48: cpumap name count_map flags 0x4 key 4B value 4B max_entries 64 memlock 4096B - after 48: cpumap name count_map flags 0x4 key 4B value 4B max_entries 64 memlock 832B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-10-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index d2110c1f6fa64..871809e71b4e2 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -673,6 +673,15 @@ static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) __cpu_map_lookup_elem); } +static u64 cpu_map_mem_usage(const struct bpf_map *map) +{ + u64 usage = sizeof(struct bpf_cpu_map); + + /* Currently the dynamically allocated elements are not counted */ + usage += (u64)map->max_entries * sizeof(struct bpf_cpu_map_entry *); + return usage; +} + BTF_ID_LIST_SINGLE(cpu_map_btf_ids, struct, bpf_cpu_map) const struct bpf_map_ops cpu_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -683,6 +692,7 @@ const struct bpf_map_ops cpu_map_ops = { .map_lookup_elem = cpu_map_lookup_elem, .map_get_next_key = cpu_map_get_next_key, .map_check_btf = map_check_no_btf, + .map_mem_usage = cpu_map_mem_usage, .map_btf_id = &cpu_map_btf_ids[0], .map_redirect = cpu_map_redirect, }; -- GitLab From fa5e83df173beb6c1334737a463fc2b4ef4efa8b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:07 +0000 Subject: [PATCH 0186/2078] bpf: devmap memory usage A new helper is introduced to calculate the memory usage of devmap and devmap_hash. The number of dynamically allocated elements are recored for devmap_hash already, but not for devmap. To track the memory size of dynamically allocated elements, this patch also count the numbers for devmap. The result as follows, - before 40: devmap name count_map flags 0x80 key 4B value 4B max_entries 65536 memlock 524288B 41: devmap_hash name count_map flags 0x80 key 4B value 4B max_entries 65536 memlock 524288B - after 40: devmap name count_map flags 0x80 <<<< no elements key 4B value 4B max_entries 65536 memlock 524608B 41: devmap_hash name count_map flags 0x80 <<<< no elements key 4B value 4B max_entries 65536 memlock 524608B Note that the number of buckets is same with max_entries for devmap_hash in this case. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-11-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2675fefc6cb62..19b036a228f78 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -819,8 +819,10 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL)); - if (old_dev) + if (old_dev) { call_rcu(&old_dev->rcu, __dev_map_entry_free); + atomic_dec((atomic_t *)&dtab->items); + } return 0; } @@ -931,6 +933,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev))); if (old_dev) call_rcu(&old_dev->rcu, __dev_map_entry_free); + else + atomic_inc((atomic_t *)&dtab->items); return 0; } @@ -1016,6 +1020,20 @@ static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) __dev_map_hash_lookup_elem); } +static u64 dev_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u64 usage = sizeof(struct bpf_dtab); + + if (map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) + usage += (u64)dtab->n_buckets * sizeof(struct hlist_head); + else + usage += (u64)map->max_entries * sizeof(struct bpf_dtab_netdev *); + usage += atomic_read((atomic_t *)&dtab->items) * + (u64)sizeof(struct bpf_dtab_netdev); + return usage; +} + BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab) const struct bpf_map_ops dev_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -1026,6 +1044,7 @@ const struct bpf_map_ops dev_map_ops = { .map_update_elem = dev_map_update_elem, .map_delete_elem = dev_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_mem_usage = dev_map_mem_usage, .map_btf_id = &dev_map_btf_ids[0], .map_redirect = dev_map_redirect, }; @@ -1039,6 +1058,7 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_update_elem = dev_map_hash_update_elem, .map_delete_elem = dev_map_hash_delete_elem, .map_check_btf = map_check_no_btf, + .map_mem_usage = dev_map_mem_usage, .map_btf_id = &dev_map_btf_ids[0], .map_redirect = dev_hash_map_redirect, }; @@ -1109,9 +1129,11 @@ static int dev_map_notification(struct notifier_block *notifier, if (!dev || netdev != dev->dev) continue; odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL)); - if (dev == odev) + if (dev == odev) { call_rcu(&dev->rcu, __dev_map_entry_free); + atomic_dec((atomic_t *)&dtab->items); + } } } rcu_read_unlock(); -- GitLab From c6e66b42a348125fd41131de75f84fa67b6579ce Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:08 +0000 Subject: [PATCH 0187/2078] bpf: queue_stack_maps memory usage A new helper is introduced to calculate queue_stack_maps memory usage. The result as follows, - before 20: queue name count_map flags 0x0 key 0B value 4B max_entries 65536 memlock 266240B 21: stack name count_map flags 0x0 key 0B value 4B max_entries 65536 memlock 266240B - after 20: queue name count_map flags 0x0 key 0B value 4B max_entries 65536 memlock 524288B 21: stack name count_map flags 0x0 key 0B value 4B max_entries 65536 memlock 524288B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-12-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 8a5e060de63bc..63ecbbcb349dc 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -246,6 +246,14 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, return -EINVAL; } +static u64 queue_stack_map_mem_usage(const struct bpf_map *map) +{ + u64 usage = sizeof(struct bpf_queue_stack); + + usage += ((u64)map->max_entries + 1) * map->value_size; + return usage; +} + BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack) const struct bpf_map_ops queue_map_ops = { .map_meta_equal = bpf_map_meta_equal, @@ -259,6 +267,7 @@ const struct bpf_map_ops queue_map_ops = { .map_pop_elem = queue_map_pop_elem, .map_peek_elem = queue_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_mem_usage = queue_stack_map_mem_usage, .map_btf_id = &queue_map_btf_ids[0], }; @@ -274,5 +283,6 @@ const struct bpf_map_ops stack_map_ops = { .map_pop_elem = stack_map_pop_elem, .map_peek_elem = stack_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_mem_usage = queue_stack_map_mem_usage, .map_btf_id = &queue_map_btf_ids[0], }; -- GitLab From f062226d8d59b521ddc946ad791048188a16722a Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:09 +0000 Subject: [PATCH 0188/2078] bpf: bpf_struct_ops memory usage A new helper is introduced to calculate bpf_struct_ops memory usage. The result as follows, - before 1: struct_ops name count_map flags 0x0 key 4B value 256B max_entries 1 memlock 4096B btf_id 73 - after 1: struct_ops name count_map flags 0x0 key 4B value 256B max_entries 1 memlock 5016B btf_id 73 Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-13-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_struct_ops.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index ece9870cab68e..38903fb52f98b 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -641,6 +641,21 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return map; } +static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; + const struct bpf_struct_ops *st_ops = st_map->st_ops; + const struct btf_type *vt = st_ops->value_type; + u64 usage; + + usage = sizeof(*st_map) + + vt->size - sizeof(struct bpf_struct_ops_value); + usage += vt->size; + usage += btf_type_vlen(vt) * sizeof(struct bpf_links *); + usage += PAGE_SIZE; + return usage; +} + BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_alloc_check = bpf_struct_ops_map_alloc_check, @@ -651,6 +666,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_delete_elem = bpf_struct_ops_map_delete_elem, .map_update_elem = bpf_struct_ops_map_update_elem, .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, + .map_mem_usage = bpf_struct_ops_map_mem_usage, .map_btf_id = &bpf_struct_ops_map_btf_ids[0], }; -- GitLab From 2f536977d6f1481f0a149140c8311103ddebe36a Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:10 +0000 Subject: [PATCH 0189/2078] bpf: local_storage memory usage A new helper is introduced to calculate local_storage map memory usage. Currently the dynamically allocated elements are not counted, since it will take runtime overhead in the element update or delete path. So let's put it aside currently, and implement it in the future if the user really needs it. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-14-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/local_storage.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index e90d9f63edc5d..a993560f200ac 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -446,6 +446,12 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } +static u64 cgroup_storage_map_usage(const struct bpf_map *map) +{ + /* Currently the dynamically allocated elements are not counted. */ + return sizeof(struct bpf_cgroup_storage_map); +} + BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_cgroup_storage_map) const struct bpf_map_ops cgroup_storage_map_ops = { @@ -457,6 +463,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = { .map_delete_elem = cgroup_storage_delete_elem, .map_check_btf = cgroup_storage_check_btf, .map_seq_show_elem = cgroup_storage_seq_show_elem, + .map_mem_usage = cgroup_storage_map_usage, .map_btf_id = &cgroup_storage_map_btf_ids[0], }; -- GitLab From 7490b7f1c02ef825ef98f7230662049d4a464a21 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:11 +0000 Subject: [PATCH 0190/2078] bpf, net: bpf_local_storage memory usage A new helper is introduced into bpf_local_storage map to calculate the memory usage. This helper is also used by other maps like bpf_cgrp_storage, bpf_inode_storage, bpf_task_storage and etc. Note that currently the dynamically allocated storage elements are not counted in the usage, since it will take extra runtime overhead in the elements update or delete path. So let's put it aside now, and implement it in the future when someone really need it. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-15-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 + kernel/bpf/bpf_cgrp_storage.c | 1 + kernel/bpf/bpf_inode_storage.c | 1 + kernel/bpf/bpf_local_storage.c | 10 ++++++++++ kernel/bpf/bpf_task_storage.c | 1 + net/core/bpf_sk_storage.c | 1 + 6 files changed, 15 insertions(+) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 6d37a40cd90e8..d934248b8e813 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -164,5 +164,6 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags, gfp_t gfp_flags); void bpf_local_storage_free_rcu(struct rcu_head *rcu); +u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); #endif /* _BPF_LOCAL_STORAGE_H */ diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 6cdf6d9ed91df..9ae07aedaf233 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -221,6 +221,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = { .map_update_elem = bpf_cgrp_storage_update_elem, .map_delete_elem = bpf_cgrp_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, + .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = cgroup_storage_ptr, }; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 05f4c66c9089f..43e2619c8167d 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -223,6 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = { .map_update_elem = bpf_fd_inode_storage_update_elem, .map_delete_elem = bpf_fd_inode_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, + .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = inode_storage_ptr, }; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 3d320393a12c9..d3ba3f2db6405 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -685,6 +685,16 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) return free_storage; } +u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map; + u64 usage = sizeof(*smap); + + /* The dynamically callocated selems are not counted currently. */ + usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); + return usage; +} + struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 1e486055a523d..20f942229f3c9 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -335,6 +335,7 @@ const struct bpf_map_ops task_storage_map_ops = { .map_update_elem = bpf_pid_task_storage_update_elem, .map_delete_elem = bpf_pid_task_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, + .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = task_storage_ptr, }; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index bb378c33f542c..7a36353dbc22c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -324,6 +324,7 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_local_storage_charge = bpf_sk_storage_charge, .map_local_storage_uncharge = bpf_sk_storage_uncharge, .map_owner_storage_ptr = bpf_sk_storage_ptr, + .map_mem_usage = bpf_local_storage_map_mem_usage, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { -- GitLab From 73d2c61919e9aeffad1b826ec23b1a4a07c1e0dd Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:12 +0000 Subject: [PATCH 0191/2078] bpf, net: sock_map memory usage sockmap and sockhash don't have something in common in allocation, so let's introduce different helpers to calculate their memory usage. The reuslt as follows, - before 28: sockmap name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B 29: sockhash name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B - after 28: sockmap name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524608B 29: sockhash name count_map flags 0x0 <<<< no updated elements key 4B value 4B max_entries 65536 memlock 1048896B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-16-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- net/core/sock_map.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a68a7290a3b2b..9b854e236d236 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -797,6 +797,14 @@ static void sock_map_fini_seq_private(void *priv_data) bpf_map_put_with_uref(info->map); } +static u64 sock_map_mem_usage(const struct bpf_map *map) +{ + u64 usage = sizeof(struct bpf_stab); + + usage += (u64)map->max_entries * sizeof(struct sock *); + return usage; +} + static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_ops = &sock_map_seq_ops, .init_seq_private = sock_map_init_seq_private, @@ -816,6 +824,7 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_mem_usage = sock_map_mem_usage, .map_btf_id = &sock_map_btf_ids[0], .iter_seq_info = &sock_map_iter_seq_info, }; @@ -1397,6 +1406,16 @@ static void sock_hash_fini_seq_private(void *priv_data) bpf_map_put_with_uref(info->map); } +static u64 sock_hash_mem_usage(const struct bpf_map *map) +{ + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + u64 usage = sizeof(*htab); + + usage += htab->buckets_num * sizeof(struct bpf_shtab_bucket); + usage += atomic_read(&htab->count) * (u64)htab->elem_size; + return usage; +} + static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_ops = &sock_hash_seq_ops, .init_seq_private = sock_hash_init_seq_private, @@ -1416,6 +1435,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_mem_usage = sock_hash_mem_usage, .map_btf_id = &sock_hash_map_btf_ids[0], .iter_seq_info = &sock_hash_iter_seq_info, }; -- GitLab From b4fd0d672bca001632d7291b5b162b08e065b815 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:13 +0000 Subject: [PATCH 0192/2078] bpf, net: xskmap memory usage A new helper is introduced to calculate xskmap memory usage. The xfsmap memory usage can be dynamically changed when we add or remove a xsk_map_node. Hence we need to track the count of xsk_map_node to get its memory usage. The result as follows, - before 10: xskmap name count_map flags 0x0 key 4B value 4B max_entries 65536 memlock 524288B - after 10: xskmap name count_map flags 0x0 <<< no elements case key 4B value 4B max_entries 65536 memlock 524608B Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-17-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock.h | 1 + net/xdp/xskmap.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3057e1a4a11c6..e96a1151ec759 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -38,6 +38,7 @@ struct xdp_umem { struct xsk_map { struct bpf_map map; spinlock_t lock; /* Synchronize map updates */ + atomic_t count; struct xdp_sock __rcu *xsk_map[]; }; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 771d0fa90ef58..0c38d71759228 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -24,6 +24,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, return ERR_PTR(-ENOMEM); bpf_map_inc(&map->map); + atomic_inc(&map->count); node->map = map; node->map_entry = map_entry; @@ -32,8 +33,11 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, static void xsk_map_node_free(struct xsk_map_node *node) { + struct xsk_map *map = node->map; + bpf_map_put(&node->map->map); kfree(node); + atomic_dec(&map->count); } static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) @@ -85,6 +89,14 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) return &m->map; } +static u64 xsk_map_mem_usage(const struct bpf_map *map) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + + return struct_size(m, xsk_map, map->max_entries) + + (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node); +} + static void xsk_map_free(struct bpf_map *map) { struct xsk_map *m = container_of(map, struct xsk_map, map); @@ -267,6 +279,7 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_mem_usage = xsk_map_mem_usage, .map_btf_id = &xsk_map_btf_ids[0], .map_redirect = xsk_map_redirect, }; -- GitLab From 9629363cd05642fe43aded44938adec067ad1da3 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:14 +0000 Subject: [PATCH 0193/2078] bpf: offload map memory usage A new helper is introduced to calculate offload map memory usage. But currently the memory dynamically allocated in netdev dev_ops, like nsim_map_update_elem, is not counted. Let's just put it aside now. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-18-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ kernel/bpf/offload.c | 6 ++++++ kernel/bpf/syscall.c | 1 + 3 files changed, 13 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9059520bbb5e0..6792a7940e1e6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2624,6 +2624,7 @@ static inline bool bpf_map_is_offloaded(struct bpf_map *map) struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); +u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -2695,6 +2696,11 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) { } +static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) +{ + return 0; +} + static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 0c85e06f7ea7f..d9c9f45e3529c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -563,6 +563,12 @@ void bpf_map_offload_map_free(struct bpf_map *map) bpf_map_area_free(offmap); } +u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) +{ + /* The memory dynamically allocated in netdev dev_ops is not counted */ + return sizeof(struct bpf_offloaded_map); +} + int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value) { struct bpf_offloaded_map *offmap = map_to_offmap(map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7c96e68859ec4..053409d951d27 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -105,6 +105,7 @@ const struct bpf_map_ops bpf_map_offload_ops = { .map_alloc = bpf_map_offload_map_alloc, .map_free = bpf_map_offload_map_free, .map_check_btf = map_check_no_btf, + .map_mem_usage = bpf_map_offload_map_mem_usage, }; static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) -- GitLab From 6b4a6ea2c62d34272d64161d43a19c02355576e2 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:15 +0000 Subject: [PATCH 0194/2078] bpf: enforce all maps having memory usage callback We have implemented memory usage callback for all maps, and we enforce any newly added map having a callback as well. We check this callback at map creation time. If it doesn't have the callback, we will return EINVAL. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-19-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 053409d951d27..f406dfa137924 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -129,6 +129,8 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) } if (attr->map_ifindex) ops = &bpf_map_offload_ops; + if (!ops->map_mem_usage) + return ERR_PTR(-EINVAL); map = ops->map_alloc(attr); if (IS_ERR(map)) return map; @@ -775,13 +777,7 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) /* Show the memory usage of a bpf map */ static u64 bpf_map_memory_usage(const struct bpf_map *map) { - unsigned long size; - - if (map->ops->map_mem_usage) - return map->ops->map_mem_usage(map); - - size = round_up(map->key_size + bpf_map_value_size(map), 8); - return round_up(map->max_entries * size, PAGE_SIZE); + return map->ops->map_mem_usage(map); } static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) -- GitLab From 11a2638d120b9d998916efb6fc55c6422e469ffa Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:19 +0200 Subject: [PATCH 0195/2078] wifi: radiotap: separate vendor TLV into header/content To be able to use a general function later for any kind of TLV, separate the vendor TLV header/content in the structs. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.8ac5195bb3e6.I19ad99c1ad3108453aede64bddf6ef1a7c4a0b74@changeid [separate from the original combined patch] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 16 +++++++-------- drivers/net/wireless/mac80211_hwsim.c | 14 ++++++------- include/net/ieee80211_radiotap.h | 20 +++++++++++++------ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 2db4f68becff2..71a6555f90d94 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -223,18 +223,18 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, vendor_data_len); /* Intel OUI */ - radiotap->oui[0] = 0xf6; - radiotap->oui[1] = 0x54; - radiotap->oui[2] = 0x25; + radiotap->content.oui[0] = 0xf6; + radiotap->content.oui[1] = 0x54; + radiotap->content.oui[2] = 0x25; /* radiotap sniffer config sub-namespace */ - radiotap->oui_subtype = 1; - radiotap->vendor_type = 0; + radiotap->content.oui_subtype = 1; + radiotap->content.vendor_type = 0; /* clear reserved field */ - radiotap->reserved = 0; + radiotap->content.reserved = 0; /* fill the data now */ - memcpy(radiotap->data, &mvm->cur_aid, sizeof(mvm->cur_aid)); + memcpy(radiotap->content.data, &mvm->cur_aid, sizeof(mvm->cur_aid)); /* and clear the padding */ - memset(radiotap->data + vendor_data_len, 0, padding); + memset(radiotap->content.data + vendor_data_len, 0, padding); rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 152617034d19e..f4bdc243ea0d1 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1556,14 +1556,14 @@ static void mac80211_hwsim_add_vendor_rtap(struct sk_buff *skb) sizeof(vendor_data)); rtap->type = cpu_to_le16(IEEE80211_RADIOTAP_VENDOR_NAMESPACE); - rtap->oui[0] = HWSIM_RADIOTAP_OUI[0]; - rtap->oui[1] = HWSIM_RADIOTAP_OUI[1]; - rtap->oui[2] = HWSIM_RADIOTAP_OUI[2]; - rtap->oui_subtype = 127; + rtap->content.oui[0] = HWSIM_RADIOTAP_OUI[0]; + rtap->content.oui[1] = HWSIM_RADIOTAP_OUI[1]; + rtap->content.oui[2] = HWSIM_RADIOTAP_OUI[2]; + rtap->content.oui_subtype = 127; /* clear reserved field */ - rtap->reserved = 0; - rtap->vendor_type = 0; - memcpy(rtap->data, vendor_data, sizeof(vendor_data)); + rtap->content.reserved = 0; + rtap->content.vendor_type = 0; + memcpy(rtap->content.data, vendor_data, sizeof(vendor_data)); IEEE80211_SKB_RXCB(skb)->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; #endif diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 95436686d3fe6..f980a72f2ce6e 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -370,18 +370,14 @@ struct ieee80211_radiotap_tlv { } __packed; /** - * struct ieee80211_radiotap_vendor_tlv - vendor radiotap data information - * @type: should always be set to IEEE80211_RADIOTAP_VENDOR_NAMESPACE - * @len: length of data + * struct ieee80211_radiotap_vendor_content - radiotap vendor data content * @oui: radiotap vendor namespace OUI * @oui_subtype: radiotap vendor sub namespace * @vendor_type: radiotap vendor type * @reserved: should always be set to zero (to avoid leaking memory) * @data: the actual vendor namespace data */ -struct ieee80211_radiotap_vendor_tlv { - __le16 type; /* IEEE80211_RADIOTAP_VENDOR_NAMESPACE */ - __le16 len; +struct ieee80211_radiotap_vendor_content { u8 oui[3]; u8 oui_subtype; __le16 vendor_type; @@ -389,6 +385,18 @@ struct ieee80211_radiotap_vendor_tlv { u8 data[]; } __packed; +/** + * struct ieee80211_radiotap_vendor_tlv - vendor radiotap data information + * @type: should always be set to IEEE80211_RADIOTAP_VENDOR_NAMESPACE + * @len: length of data + * @content: vendor content see @ieee80211_radiotap_vendor_content + */ +struct ieee80211_radiotap_vendor_tlv { + __le16 type; /* IEEE80211_RADIOTAP_VENDOR_NAMESPACE */ + __le16 len; + struct ieee80211_radiotap_vendor_content content; +}; + /* ieee80211_radiotap_eht_usig - content of U-SIG tlv (type 33) * see www.radiotap.org/fields/U-SIG.html for details */ -- GitLab From 056805bcc6bc439de1c9fbb1854042fdf40c8638 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:19 +0200 Subject: [PATCH 0196/2078] wifi: iwlwifi: mvm: add an helper function radiotap TLVs Add a helper function setting type, length, zeroing out TLV data and including adding padding if necessary. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.8ac5195bb3e6.I19ad99c1ad3108453aede64bddf6ef1a7c4a0b74@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 71a6555f90d94..206b56ca8db85 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -205,36 +205,45 @@ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb, return 0; } +/* put a TLV on the skb and return data pointer + * + * Also pad to 4 the len and zero out all data part + */ +static void * +iwl_mvm_radiotap_put_tlv(struct sk_buff *skb, u16 type, u16 len) +{ + struct ieee80211_radiotap_tlv *tlv; + + tlv = skb_put(skb, sizeof(*tlv)); + tlv->type = cpu_to_le16(type); + tlv->len = cpu_to_le16(len); + return skb_put_zero(skb, ALIGN(len, 4)); +} + static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_radiotap_vendor_tlv *radiotap; + struct ieee80211_radiotap_vendor_content *radiotap; const u16 vendor_data_len = sizeof(mvm->cur_aid); - const u16 padding = ALIGN(vendor_data_len, 4) - vendor_data_len; if (!mvm->cur_aid) return; - radiotap = skb_put(skb, sizeof(*radiotap) + vendor_data_len + padding); - radiotap->type = cpu_to_le16(IEEE80211_RADIOTAP_VENDOR_NAMESPACE); - radiotap->len = cpu_to_le16(sizeof(*radiotap) - - sizeof(struct ieee80211_radiotap_tlv) + - vendor_data_len); + radiotap = iwl_mvm_radiotap_put_tlv(skb, + IEEE80211_RADIOTAP_VENDOR_NAMESPACE, + sizeof(*radiotap) + vendor_data_len); /* Intel OUI */ - radiotap->content.oui[0] = 0xf6; - radiotap->content.oui[1] = 0x54; - radiotap->content.oui[2] = 0x25; + radiotap->oui[0] = 0xf6; + radiotap->oui[1] = 0x54; + radiotap->oui[2] = 0x25; /* radiotap sniffer config sub-namespace */ - radiotap->content.oui_subtype = 1; - radiotap->content.vendor_type = 0; - /* clear reserved field */ - radiotap->content.reserved = 0; + radiotap->oui_subtype = 1; + radiotap->vendor_type = 0; + /* fill the data now */ - memcpy(radiotap->content.data, &mvm->cur_aid, sizeof(mvm->cur_aid)); - /* and clear the padding */ - memset(radiotap->content.data + vendor_data_len, 0, padding); + memcpy(radiotap->data, &mvm->cur_aid, sizeof(mvm->cur_aid)); rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; } -- GitLab From 24f7f6e3ed3b91381307c649a32aff9a69b7cae6 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:20 +0200 Subject: [PATCH 0197/2078] wifi: iwlwifi: mvm: add EHT radiotap info based on rate_n_flags rate_n_flags is always present in the data so at least give all of the information we can extract from rate_n_flags Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.b1c7d49ad35e.Ie2412ac6f88700aa3767ff95ffb52a806b13b7ce@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/fw/api/rs.h | 24 +++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 135 ++++++++++++++++++ 2 files changed, 152 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h index ddacd5b45aeab..1d372a09ebb4a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h @@ -449,11 +449,16 @@ enum { * 1 2xLTF+0.8us * 2 2xLTF+1.6us * 3 4xLTF+3.2us - * HE TRIG: + * HE-EHT TRIG: * 0 1xLTF+1.6us * 1 2xLTF+1.6us * 2 4xLTF+3.2us * 3 (does not occur) + * EHT MU: + * 0 2xLTF+0.8us + * 1 2xLTF+1.6us + * 2 4xLTF+0.8us + * 3 4xLTF+3.2us */ #define RATE_MCS_HE_GI_LTF_POS 20 #define RATE_MCS_HE_GI_LTF_MSK_V1 (3 << RATE_MCS_HE_GI_LTF_POS) @@ -546,12 +551,17 @@ enum { /* * Bits 13-11: (0) 20MHz, (1) 40MHz, (2) 80MHz, (3) 160MHz, (4) 320MHz */ -#define RATE_MCS_CHAN_WIDTH_MSK (0x7 << RATE_MCS_CHAN_WIDTH_POS) -#define RATE_MCS_CHAN_WIDTH_20 (0 << RATE_MCS_CHAN_WIDTH_POS) -#define RATE_MCS_CHAN_WIDTH_40 (1 << RATE_MCS_CHAN_WIDTH_POS) -#define RATE_MCS_CHAN_WIDTH_80 (2 << RATE_MCS_CHAN_WIDTH_POS) -#define RATE_MCS_CHAN_WIDTH_160 (3 << RATE_MCS_CHAN_WIDTH_POS) -#define RATE_MCS_CHAN_WIDTH_320 (4 << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_MSK (0x7 << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_20_VAL 0 +#define RATE_MCS_CHAN_WIDTH_20 (RATE_MCS_CHAN_WIDTH_20_VAL << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_40_VAL 1 +#define RATE_MCS_CHAN_WIDTH_40 (RATE_MCS_CHAN_WIDTH_40_VAL << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_80_VAL 2 +#define RATE_MCS_CHAN_WIDTH_80 (RATE_MCS_CHAN_WIDTH_80_VAL << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_160_VAL 3 +#define RATE_MCS_CHAN_WIDTH_160 (RATE_MCS_CHAN_WIDTH_160_VAL << RATE_MCS_CHAN_WIDTH_POS) +#define RATE_MCS_CHAN_WIDTH_320_VAL 4 +#define RATE_MCS_CHAN_WIDTH_320 (RATE_MCS_CHAN_WIDTH_320_VAL << RATE_MCS_CHAN_WIDTH_POS) /* Bit 15-14: Antenna selection: * Bit 14: Ant A active diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 206b56ca8db85..aa72910f8ff2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1456,6 +1456,137 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm, } } +static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, + struct iwl_mvm_rx_phy_data *phy_data, + int queue) +{ + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + + struct ieee80211_radiotap_eht *eht; + struct ieee80211_radiotap_eht_usig *usig; + + u32 rate_n_flags = phy_data->rate_n_flags; + u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; + /* EHT and HE have the same valus for LTF */ + u8 ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN; + u16 phy_info = phy_data->phy_info; + u32 bw; + + /* u32 for 1 user_info */ + eht = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, + sizeof(*eht) + sizeof(u32)); + + usig = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT_USIG, + sizeof(*usig)); + rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; + usig->common |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN); + + /* specific handling for 320MHz */ + bw = FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags); + if (bw == RATE_MCS_CHAN_WIDTH_320_VAL) + bw += FIELD_GET(IWL_RX_PHY_DATA0_EHT_BW320_SLOT, + le32_to_cpu(phy_data->d0)); + + usig->common |= cpu_to_le32 + (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW, bw)); + + /* report the AMPDU-EOF bit on single frames */ + if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) { + rx_status->flag |= RX_FLAG_AMPDU_DETAILS; + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; + } + + /* update aggregation data for monitor sake on default queue */ + if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) && + (phy_info & IWL_RX_MPDU_PHY_AMPDU)) { + bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; + + /* toggle is switched whenever new aggregation starts */ + if (toggle_bit != mvm->ampdu_toggle) { + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; + } + } + + /* TODO: fill usig info (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) */ + +#define CHECK_TYPE(F) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_ ## F != \ + (RATE_MCS_HE_TYPE_ ## F >> RATE_MCS_HE_TYPE_POS)) + + CHECK_TYPE(SU); + CHECK_TYPE(EXT_SU); + CHECK_TYPE(MU); + CHECK_TYPE(TRIG); + + switch (FIELD_GET(RATE_MCS_HE_GI_LTF_MSK, rate_n_flags)) { + case 0: + if (he_type == RATE_MCS_HE_TYPE_TRIG) { + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_1_6; + ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_1X; + } else { + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_0_8; + ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X; + } + break; + case 1: + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_1_6; + ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X; + break; + case 2: + ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X; + if (he_type == RATE_MCS_HE_TYPE_TRIG) + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_3_2; + else + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_0_8; + break; + case 3: + if (he_type != RATE_MCS_HE_TYPE_TRIG) { + ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X; + rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_3_2; + } + break; + default: + /* nothing here */ + break; + } + + if (ltf != IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN) { + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_GI); + eht->data[0] |= cpu_to_le32 + (FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_LTF, + ltf) | + FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_GI, + rx_status->eht.gi)); + } + + eht->user_info[0] |= cpu_to_le32 + (IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN | + IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN | + IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O | + IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O | + IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER); + + if (rate_n_flags & RATE_MCS_BF_MSK) + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O); + + if (rate_n_flags & RATE_MCS_LDPC_MSK) + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_CODING); + + eht->user_info[0] |= cpu_to_le32 + (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS, + FIELD_GET(RATE_VHT_MCS_RATE_CODE_MSK, + rate_n_flags)) | + FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O, + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags))); +} + static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_mvm_rx_phy_data *phy_data, int queue) @@ -1703,6 +1834,10 @@ static void iwl_mvm_rx_fill_status(struct iwl_mvm *mvm, iwl_mvm_get_signal_strength(mvm, rx_status, rate_n_flags, phy_data->energy_a, phy_data->energy_b); + /* using TLV format and must be after all fixed len fields */ + if (format == RATE_MCS_EHT_MSK) + iwl_mvm_rx_eht(mvm, skb, phy_data, queue); + if (unlikely(mvm->monitor_on)) iwl_mvm_add_rtap_sniffer_config(mvm, skb); -- GitLab From 4ec825854c3c1e7a2cb3fedd6d1acbba0a3e2d74 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:21 +0200 Subject: [PATCH 0198/2078] wifi: iwlwifi: mvm: add all EHT based on data0 info from HW Update all radiotap EHT TLVs that we can extract from data0 in HW. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.730f219e02ee.Ife3dd85c65758694d7602e8bc8660887d77faacf@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/fw/api/rx.h | 3 +- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 73 ++++++++++++++++++- 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 1c4e849320585..544d22472a6ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -367,7 +367,8 @@ enum iwl_rx_phy_eht_data1 { /* number of EHT-LTF symbols 0 - 1 EHT-LTF, 1 - 2 EHT-LTFs, 2 - 4 EHT-LTFs, * 3 - 6 EHT-LTFs, 4 - 8 EHT-LTFs */ IWL_RX_PHY_DATA1_EHT_SIG_LTF_NUM = 0x000000e0, - IWL_RX_PHY_DATA1_EHT_RU_ALLOC = 0x0000ff00, + IWL_RX_PHY_DATA1_EHT_B0 = 0x00000100, + IWL_RX_PHY_DATA1_EHT_RU_B1_B7_ALLOC = 0x0000fe00, }; /* goes into Metadata DW 7 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index aa72910f8ff2b..174bed9b50742 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2022 Intel Corporation + * Copyright (C) 2012-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -1456,6 +1456,74 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm, } } +#define LE32_DEC_ENC(value, dec_bits, enc_bits) \ + le32_encode_bits(le32_get_bits(value, dec_bits), enc_bits) + +static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, + struct iwl_mvm_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht, + struct ieee80211_radiotap_eht_usig *usig) + +{ + __le32 data0 = phy_data->d0; + __le32 data1 = phy_data->d1; + u8 info_type = phy_data->info_type; + + /* Not in EHT range */ + if (info_type < IWL_RX_PHY_INFO_TYPE_EHT_MU || + info_type > IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT) + return; + + usig->common |= cpu_to_le32 + (IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN | + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN); + usig->common |= LE32_DEC_ENC(data0, + IWL_RX_PHY_DATA0_EHT_UPLINK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); + usig->common |= LE32_DEC_ENC(data0, + IWL_RX_PHY_DATA0_EHT_BSS_COLOR_MASK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN); + + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE); + eht->data[0] |= LE32_DEC_ENC(data0, + IWL_RX_PHY_DATA0_ETH_SPATIAL_REUSE_MASK, + IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE); + + /* All RU allocating size/index is in TB format */ + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT); + eht->data[8] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PS160, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160); + eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_B0, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0); + eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_RU_B1_B7_ALLOC, + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); + + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); + usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM); + eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_LDPC_EXT_SYM, + IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM); + + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM); + eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PRE_FEC_PAD_MASK, + IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM); + + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM); + eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PE_DISAMBIG, + IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM); + + /* TODO: what about IWL_RX_PHY_DATA0_EHT_BW320_SLOT */ + + if (!le32_get_bits(data0, IWL_RX_PHY_DATA0_EHT_SIGA_CRC_OK)) + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC); + + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN); + usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PHY_VER, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER); +} + static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_mvm_rx_phy_data *phy_data, int queue) @@ -1512,7 +1580,8 @@ static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, } } - /* TODO: fill usig info (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) */ + if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) + iwl_mvm_decode_eht_phy_data(mvm, phy_data, rx_status, eht, usig); #define CHECK_TYPE(F) \ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_ ## F != \ -- GitLab From 3ecf34118dc80a0508dc82e284617513cbb270e8 Mon Sep 17 00:00:00 2001 From: Alon Giladi Date: Sun, 5 Mar 2023 14:16:22 +0200 Subject: [PATCH 0199/2078] wifi: iwlwifi: mvm: allow Microsoft to use TAS Add Microsoft to the list of OEMs which allowed to use TAS. Signed-off-by: Alon Giladi Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.662967fec1cc.Icb30cddc049cb5402fd5ab2ce7f95033e478b1b9@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 0c6b49fcb00d4..45981e22b2dbd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1084,6 +1084,11 @@ static const struct dmi_system_id dmi_tas_approved_list[] = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), }, }, + { .ident = "MSFT", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + }, + }, /* keep last */ {} -- GitLab From c0da321b601a92453f928c819dbd65c5712480d3 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:23 +0200 Subject: [PATCH 0200/2078] wifi: iwlwifi: mvm: rename define to generic name The type RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED is applied to all TB frames including EHT mode, so rename accordingly. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.e4f51f347e48.I2d6ecb6eadc95666d2ef9794662ee779488ceac1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/api/rx.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 544d22472a6ff..0f190266fffd4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -726,7 +726,7 @@ struct iwl_rx_mpdu_desc { #define RX_NO_DATA_INFO_TYPE_RX_ERR 1 #define RX_NO_DATA_INFO_TYPE_NDP 2 #define RX_NO_DATA_INFO_TYPE_MU_UNMATCHED 3 -#define RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED 4 +#define RX_NO_DATA_INFO_TYPE_TB_UNMATCHED 4 #define RX_NO_DATA_INFO_ERR_POS 8 #define RX_NO_DATA_INFO_ERR_MSK (0xff << RX_NO_DATA_INFO_ERR_POS) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 174bed9b50742..1fb77cda568a0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -2341,7 +2341,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING; break; case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED: - case RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED: + case RX_NO_DATA_INFO_TYPE_TB_UNMATCHED: rx_status->zero_length_psdu_type = IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED; break; -- GitLab From e8c0a6fd08d7dc6c178e77f154794cdd55575780 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:24 +0200 Subject: [PATCH 0201/2078] wifi: iwlwifi: mvm: decode USIG_B1_B7 RU to nl80211 RU width This is based on 802.11be D1.5 table 9-53a Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.0b720d6d6a48.I0034dd108696223494799d3ffe4f09685800b831@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1fb77cda568a0..c069e81bf3f1b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1459,6 +1459,75 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm, #define LE32_DEC_ENC(value, dec_bits, enc_bits) \ le32_encode_bits(le32_get_bits(value, dec_bits), enc_bits) +static void iwl_mvm_decode_eht_ru(struct iwl_mvm *mvm, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht) +{ + u32 ru = le32_get_bits(eht->data[8], + IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); + enum nl80211_eht_ru_alloc nl_ru; + + /* Using D1.5 Table 9-53a - Encoding of PS160 and RU Allocation subfields + * in an EHT variant User Info field + */ + + switch (ru) { + case 0 ... 36: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_26; + break; + case 37 ... 52: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_52; + break; + case 53 ... 60: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_106; + break; + case 61 ... 64: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_242; + break; + case 65 ... 66: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_484; + break; + case 67: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996; + break; + case 68: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996; + break; + case 69: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_4x996; + break; + case 70 ... 81: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_52P26; + break; + case 82 ... 89: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_106P26; + break; + case 90 ... 93: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_484P242; + break; + case 94 ... 95: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484; + break; + case 96 ... 99: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242; + break; + case 100 ... 103: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484; + break; + case 104: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996; + break; + case 105 ... 106: + nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484; + break; + default: + return; + } + + rx_status->bw = RATE_INFO_BW_EHT_RU; + rx_status->eht.ru = nl_ru; +} + static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, struct iwl_mvm_rx_phy_data *phy_data, struct ieee80211_rx_status *rx_status, @@ -1499,6 +1568,8 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_RU_B1_B7_ALLOC, IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1); + iwl_mvm_decode_eht_ru(mvm, rx_status, eht); + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); -- GitLab From feb4a0e215fa98f72c37b51344d3c013bb43e013 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:25 +0200 Subject: [PATCH 0202/2078] wifi: iwlwifi: mvm: parse FW frame metadata for EHT sniffer mode In EHT sniffer mode DW4 is all used for sniffer data (unlike we have in HE mode), so move the full DW4 into a union, and we extract the new data5 used for parsing USIG info and set all to radiotap TLVs with the extracted data. Also parse OFDM_RX_VECTOR_USIG_A1_OUT and OFDM_RX_VECTOR_USIG_A2_OUT for rx_no_data notification. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.557d3870753b.I4e9fa4d21900a187753529d46956ba2a7ee75fda@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/fw/api/rx.h | 66 +++++-- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 170 +++++++++++++++++- 2 files changed, 213 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 0f190266fffd4..97e946e70279a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -674,22 +674,31 @@ struct iwl_rx_mpdu_desc { * @mac_phy_idx: MAC/PHY index */ u8 mac_phy_idx; - /* DW4 - carries csum data only when rpa_en == 1 */ - /** - * @raw_csum: raw checksum (alledgedly unreliable) - */ - __le16 raw_csum; - + /* DW4 */ union { + struct { + /* carries csum data only when rpa_en == 1 */ + /** + * @raw_csum: raw checksum (alledgedly unreliable) + */ + __le16 raw_csum; + + union { + /** + * @l3l4_flags: &enum iwl_rx_l3l4_flags + */ + __le16 l3l4_flags; + + /** + * @phy_data4: depends on info type, see phy_data1 + */ + __le16 phy_data4; + }; + }; /** - * @l3l4_flags: &enum iwl_rx_l3l4_flags - */ - __le16 l3l4_flags; - - /** - * @phy_data4: depends on info type, see phy_data1 + * @phy_eht_data4: depends on info type, see phy_data1 */ - __le16 phy_data4; + __le32 phy_eht_data4; }; /* DW5 */ /** @@ -744,6 +753,35 @@ struct iwl_rx_mpdu_desc { #define RX_NO_DATA_RX_VEC0_VHT_NSTS_MSK 0x38000000 #define RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK 0x00f00000 +/* content of OFDM_RX_VECTOR_USIG_A1_OUT */ +enum iwl_rx_usig_a1 { + IWL_RX_USIG_A1_ENHANCED_WIFI_VER_ID = 0x00000007, + IWL_RX_USIG_A1_BANDWIDTH = 0x00000038, + IWL_RX_USIG_A1_UL_FLAG = 0x00000040, + IWL_RX_USIG_A1_BSS_COLOR = 0x00001f80, + IWL_RX_USIG_A1_TXOP_DURATION = 0x000fe000, + IWL_RX_USIG_A1_DISREGARD = 0x01f00000, + IWL_RX_USIG_A1_VALIDATE = 0x02000000, + IWL_RX_USIG_A1_EHT_BW320_SLOT = 0x04000000, + IWL_RX_USIG_A1_EHT_TYPE = 0x18000000, + IWL_RX_USIG_A1_RDY = 0x80000000, +}; + +/* content of OFDM_RX_VECTOR_USIG_A2_EHT_OUT */ +enum iwl_rx_usig_a2_eht { + IWL_RX_USIG_A2_EHT_PPDU_TYPE = 0x00000003, + IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2 = 0x00000004, + IWL_RX_USIG_A2_EHT_PUNC_CHANNEL = 0x000000f8, + IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B8 = 0x00000100, + IWL_RX_USIG_A2_EHT_SIG_MCS = 0x00000600, + IWL_RX_USIG_A2_EHT_SIG_SYM_NUM = 0x0000f800, + IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_1 = 0x000f0000, + IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_2 = 0x00f00000, + IWL_RX_USIG_A2_EHT_TRIG_USIG2_DISREGARD = 0x1f000000, + IWL_RX_USIG_A2_EHT_CRC_OK = 0x40000000, + IWL_RX_USIG_A2_EHT_RDY = 0x80000000, +}; + /** * struct iwl_rx_no_data - RX no data descriptor * @info: 7:0 frame type, 15:8 RX error type @@ -781,7 +819,7 @@ struct iwl_rx_no_data { * @rx_vec: DW-12:9 raw RX vectors from DSP according to modulation type. * for VHT: OFDM_RX_VECTOR_SIGA1_OUT, OFDM_RX_VECTOR_SIGA2_OUT * for HE: OFDM_RX_VECTOR_HE_SIGA1_OUT, OFDM_RX_VECTOR_HE_SIGA2_OUT - * for EHT: OFDM_RX_VECTOR_USIG_A1_OUT, OFDM_RX_VECTOR_USIG_A2_OUT, + * for EHT: OFDM_RX_VECTOR_USIG_A1_OUT, OFDM_RX_VECTOR_USIG_A2_EHT_OUT, * OFDM_RX_VECTOR_EHT_OUT, OFDM_RX_VECTOR_EHT_USER_FIELD_OUT */ struct iwl_rx_no_data_ver_3 { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index c069e81bf3f1b..1334c96098a57 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1177,8 +1177,10 @@ static void iwl_mvm_flip_address(u8 *addr) struct iwl_mvm_rx_phy_data { enum iwl_rx_phy_info_type info_type; - __le32 d0, d1, d2, d3; + __le32 d0, d1, d2, d3, eht_d4, d5; __le16 d4; + bool with_data; + __le32 rx_vec[4]; u32 rate_n_flags; u32 gp2_on_air_rise; @@ -1459,6 +1461,119 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm, #define LE32_DEC_ENC(value, dec_bits, enc_bits) \ le32_encode_bits(le32_get_bits(value, dec_bits), enc_bits) +#define IWL_MVM_ENC_USIG_VALUE_MASK(usig, in_value, dec_bits, enc_bits) do { \ + typeof(enc_bits) _enc_bits = enc_bits; \ + typeof(usig) _usig = usig; \ + (_usig)->mask |= cpu_to_le32(_enc_bits); \ + (_usig)->value |= LE32_DEC_ENC(in_value, dec_bits, _enc_bits); \ +} while (0) + +static void iwl_mvm_decode_eht_ext_mu(struct iwl_mvm *mvm, + struct iwl_mvm_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht, + struct ieee80211_radiotap_eht_usig *usig) +{ + __le32 data1 = phy_data->d1; + + if (phy_data->with_data) { + __le32 data4 = phy_data->eht_d4; + __le32 data5 = phy_data->d5; + + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, + IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, + IWL_RX_PHY_DATA5_EHT_MU_PUNC_CH_CODE, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data4, + IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS); + IWL_MVM_ENC_USIG_VALUE_MASK + (usig, data1, IWL_RX_PHY_DATA1_EHT_MU_NUM_SIG_SYM_USIGA2, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); + + } else { + __le32 usig_a1 = phy_data->rx_vec[0]; + __le32 usig_a2 = phy_data->rx_vec[1]; + + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1, + IWL_RX_USIG_A1_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1, + IWL_RX_USIG_A1_VALIDATE, + IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_PPDU_TYPE, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_PUNC_CHANNEL, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B8, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_SIG_MCS, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS); + IWL_MVM_ENC_USIG_VALUE_MASK + (usig, usig_a2, IWL_RX_USIG_A2_EHT_SIG_SYM_NUM, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_CRC_OK, + IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC); + } +} + +static void iwl_mvm_decode_eht_ext_tb(struct iwl_mvm *mvm, + struct iwl_mvm_rx_phy_data *phy_data, + struct ieee80211_rx_status *rx_status, + struct ieee80211_radiotap_eht *eht, + struct ieee80211_radiotap_eht_usig *usig) +{ + if (phy_data->with_data) { + __le32 data5 = phy_data->d5; + + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, + IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, + IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE1, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1); + + IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, + IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE2, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2); + } else { + __le32 usig_a1 = phy_data->rx_vec[0]; + __le32 usig_a2 = phy_data->rx_vec[1]; + + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1, + IWL_RX_USIG_A1_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_PPDU_TYPE, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_1, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_2, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_TRIG_USIG2_DISREGARD, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD); + IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2, + IWL_RX_USIG_A2_EHT_CRC_OK, + IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC); + } +} + static void iwl_mvm_decode_eht_ru(struct iwl_mvm *mvm, struct ieee80211_rx_status *rx_status, struct ieee80211_radiotap_eht *eht) @@ -1537,6 +1652,7 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, { __le32 data0 = phy_data->d0; __le32 data1 = phy_data->d1; + __le32 usig_a1 = phy_data->rx_vec[0]; u8 info_type = phy_data->info_type; /* Not in EHT range */ @@ -1547,12 +1663,21 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, usig->common |= cpu_to_le32 (IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN | IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN); - usig->common |= LE32_DEC_ENC(data0, - IWL_RX_PHY_DATA0_EHT_UPLINK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); - usig->common |= LE32_DEC_ENC(data0, - IWL_RX_PHY_DATA0_EHT_BSS_COLOR_MASK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN); + if (phy_data->with_data) { + usig->common |= LE32_DEC_ENC(data0, + IWL_RX_PHY_DATA0_EHT_UPLINK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); + usig->common |= LE32_DEC_ENC(data0, + IWL_RX_PHY_DATA0_EHT_BSS_COLOR_MASK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR); + } else { + usig->common |= LE32_DEC_ENC(usig_a1, + IWL_RX_USIG_A1_UL_FLAG, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL); + usig->common |= LE32_DEC_ENC(usig_a1, + IWL_RX_USIG_A1_BSS_COLOR, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR); + } eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE); eht->data[0] |= LE32_DEC_ENC(data0, @@ -1571,8 +1696,13 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, iwl_mvm_decode_eht_ru(mvm, rx_status, eht); usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); - usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, - IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); + if (phy_data->with_data) + usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); + else + usig->common |= LE32_DEC_ENC(usig_a1, IWL_RX_USIG_A1_TXOP_DURATION, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP); + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM); eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_LDPC_EXT_SYM, IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM); @@ -1593,6 +1723,23 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN); usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PHY_VER, IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER); + + /* + * TODO: what about TB - IWL_RX_PHY_DATA1_EHT_TB_PILOT_TYPE, + * IWL_RX_PHY_DATA1_EHT_TB_LOW_SS + */ + + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF); + eht->data[0] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_SIG_LTF_NUM, + IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF); + + if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT || + info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB) + iwl_mvm_decode_eht_ext_tb(mvm, phy_data, rx_status, eht, usig); + + if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT || + info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU) + iwl_mvm_decode_eht_ext_mu(mvm, phy_data, rx_status, eht, usig); } static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, @@ -2077,6 +2224,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, phy_data.d1 = desc->v3.phy_data1; phy_data.d2 = desc->v3.phy_data2; phy_data.d3 = desc->v3.phy_data3; + phy_data.eht_d4 = desc->phy_eht_data4; + phy_data.d5 = desc->v3.phy_data5; } else { phy_data.rate_n_flags = le32_to_cpu(desc->v1.rate_n_flags); phy_data.channel = desc->v1.channel; @@ -2106,6 +2255,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, return; } + phy_data.with_data = true; phy_data.phy_info = le16_to_cpu(desc->phy_info); phy_data.d4 = desc->phy_data4; @@ -2368,6 +2518,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, phy_data.energy_a = u32_get_bits(rssi, RX_NO_DATA_CHAIN_A_MSK); phy_data.energy_b = u32_get_bits(rssi, RX_NO_DATA_CHAIN_B_MSK); phy_data.channel = u32_get_bits(rssi, RX_NO_DATA_CHANNEL_MSK); + phy_data.with_data = false; if (iwl_fw_lookup_notif_ver(mvm->fw, DATA_PATH_GROUP, RX_NO_DATA_NOTIF, 0) < 2) { @@ -2386,6 +2537,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, sizeof(struct iwl_rx_no_data_ver_3))) /* invalid len for ver 3 */ return; + memcpy(phy_data.rx_vec, desc->rx_vec, sizeof(phy_data.rx_vec)); } else { if (format == RATE_MCS_EHT_MSK) /* no support for EHT before version 3 API */ -- GitLab From 5abf31544a4d2bcb3a0dd2fd080f9448d4026975 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:26 +0200 Subject: [PATCH 0203/2078] wifi: iwlwifi: mvm: add primary 80 known for EHT radiotap Calculate the position of the control channel in the wide channel based on the chandef, this is used to obtain the value of N in 802.11be D1.5 Table 9-53a in the column PHY MU/MRU index. To avoid the need to calculate every frame the value, do it once monitor vif is added. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.fe9a5b58e241.I291ee480252d098f62d9ec39040284d3e521d88e@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 27 ++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 5 ++++ drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 9 +++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 565522466eba5..51c7652df0ef9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1362,6 +1362,28 @@ static void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk) ieee80211_chswitch_done(vif, false); } +static u8 +iwl_mvm_chandef_get_primary_80(struct cfg80211_chan_def *chandef) +{ + int data_start; + int control_start; + int bw; + + if (chandef->width == NL80211_CHAN_WIDTH_320) + bw = 320; + else if (chandef->width == NL80211_CHAN_WIDTH_160) + bw = 160; + else + return 0; + + /* data is bw wide so the start is half the width */ + data_start = chandef->center_freq1 - bw / 2; + /* control is 20Mhz width */ + control_start = chandef->chan->center_freq - 10; + + return (control_start - data_start) / 80; +} + static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { @@ -1478,8 +1500,11 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, INIT_DELAYED_WORK(&mvmvif->csa_work, iwl_mvm_channel_switch_disconnect_wk); - if (vif->type == NL80211_IFTYPE_MONITOR) + if (vif->type == NL80211_IFTYPE_MONITOR) { mvm->monitor_on = true; + mvm->monitor_p80 = + iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef); + } iwl_mvm_vif_dbgfs_register(mvm, vif); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ce6b701f3f4cd..301f39d9efe73 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1096,6 +1096,11 @@ struct iwl_mvm { /* does a monitor vif exist (only one can exist hence bool) */ bool monitor_on; + /* + * primary channel position relative to he whole bandwidth, + * in steps of 80 MHz + */ + u8 monitor_p80; /* sniffer data to include in radiotap */ __le16 cur_aid; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 1334c96098a57..b545673fb150b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1695,6 +1695,15 @@ static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm, iwl_mvm_decode_eht_ru(mvm, rx_status, eht); + /* We only get here in case of IWL_RX_MPDU_PHY_TSF_OVERLOAD is set + * which is on only in case of monitor mode so no need to check monitor + * mode + */ + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80); + eht->data[1] |= + le32_encode_bits(mvm->monitor_p80, + IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80); + usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN); if (phy_data->with_data) usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK, -- GitLab From b85f7ebb24974e016520b5ea39394ab4cb08af0c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 5 Mar 2023 14:16:27 +0200 Subject: [PATCH 0204/2078] wifi: iwlwifi: mvm: avoid UB shift of snif_queue For the old TX API we need the tfd_queue_msk, but for the new TX API we don't need it here because we add it to the station later. However, for the new API mvm->snif_queue is set to IWL_MVM_INVALID_QUEUE == 0xffff, so the BIT() here is undefined behaviour. Since we don't need the tfd_queue_msk value for the new TX API at all, simply fill it in only for the old API. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.b8da0b7eb194.I53744fd7cfb6e146a9393272a2a61852841238d9@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index aa791dbc3066f..114c96ba39eeb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -654,7 +654,7 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, u32 action) { struct iwl_mac_ctx_cmd cmd = {}; - u32 tfd_queue_msk = BIT(mvm->snif_queue); + u32 tfd_queue_msk = 0; int ret; WARN_ON(vif->type != NL80211_IFTYPE_MONITOR); @@ -669,6 +669,14 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, MAC_FILTER_ACCEPT_GRP); ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS); + /* + * the queue mask is only relevant for old TX API, and + * mvm->snif_queue isn't set here (it's still set to + * IWL_MVM_INVALID_QUEUE so the BIT() of it is UB) + */ + if (!iwl_mvm_has_new_tx_api(mvm)) + tfd_queue_msk = BIT(mvm->snif_queue); + /* Allocate sniffer station */ ret = iwl_mvm_allocate_int_sta(mvm, &mvm->snif_sta, tfd_queue_msk, vif->type, IWL_STA_GENERAL_PURPOSE); -- GitLab From f7bd883b3fca50f8429a67b4bfdf82a22e8dac53 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 5 Mar 2023 14:16:28 +0200 Subject: [PATCH 0205/2078] wifi: iwlwifi: mvm: make flush code a bit clearer The mask building here is only relevant for the old TX API, so move it into the else branch. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.c0795543f254.I302124a8584dd049577b0c2c74ecd7c48ddf4f3e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 51c7652df0ef9..ab02c6076276c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -5058,9 +5058,10 @@ static void iwl_mvm_mac_flush(struct ieee80211_hw *hw, if (iwl_mvm_flush_sta(mvm, mvmsta, false)) IWL_ERR(mvm, "flush request fail\n"); } else { - msk |= mvmsta->tfd_queue_msk; if (iwl_mvm_has_new_tx_api(mvm)) iwl_mvm_wait_sta_queues_empty(mvm, mvmsta); + else /* only used for !iwl_mvm_has_new_tx_api() below */ + msk |= mvmsta->tfd_queue_msk; } } -- GitLab From b96e516ccf9f5a227756989d4e47297f65e12e60 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Sun, 5 Mar 2023 14:16:29 +0200 Subject: [PATCH 0206/2078] wifi: iwlwifi: Add support for B step of BnJ-Fm4 Support new HW step of BnJ-Fm4 device Signed-off-by: Golan Ben Ami Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.bb0591c59898.If04d7a45707ba008981f8c8ea7f7f107880f146c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 12 ++++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 5 +++++ 3 files changed, 18 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 3bdd6774716dd..05720352e49f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -62,6 +62,7 @@ #define IWL_BZ_Z_GF_A_FW_PRE "iwlwifi-bz-z0-gf-a0-" #define IWL_BNJ_A_FM_A_FW_PRE "iwlwifi-BzBnj-a0-fm-a0-" #define IWL_BNJ_A_FM4_A_FW_PRE "iwlwifi-BzBnj-a0-fm4-a0-" +#define IWL_BNJ_B_FM4_B_FW_PRE "iwlwifi-BzBnj-b0-fm4-b0-" #define IWL_BNJ_A_GF_A_FW_PRE "iwlwifi-BzBnj-a0-gf-a0-" #define IWL_BNJ_A_GF4_A_FW_PRE "iwlwifi-BzBnj-a0-gf4-a0-" #define IWL_BNJ_A_HR_B_FW_PRE "iwlwifi-BzBnj-a0-hr-b0-" @@ -132,6 +133,8 @@ IWL_BNJ_A_FM_A_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_FM4_A_MODULE_FIRMWARE(api) \ IWL_BNJ_A_FM4_A_FW_PRE __stringify(api) ".ucode" +#define IWL_BNJ_B_FM4_B_MODULE_FIRMWARE(api) \ + IWL_BNJ_B_FM4_B_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_GF_A_MODULE_FIRMWARE(api) \ IWL_BNJ_A_GF_A_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(api) \ @@ -998,6 +1001,14 @@ const struct iwl_cfg iwl_cfg_bnj_a0_fm4_a0 = { .num_rbds = IWL_NUM_RBDS_AX210_HE, }; +const struct iwl_cfg iwl_cfg_bnj_b0_fm4_b0 = { + .fw_name_pre = IWL_BNJ_B_FM4_B_FW_PRE, + .uhb_supported = true, + IWL_DEVICE_BZ, + .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, + .num_rbds = IWL_NUM_RBDS_AX210_HE, +}; + const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0 = { .fw_name_pre = IWL_BNJ_A_GF_A_FW_PRE, .uhb_supported = true, @@ -1059,6 +1070,7 @@ MODULE_FIRMWARE(IWL_BZ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_GL_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_FM4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_BNJ_B_FM4_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index cfa5e1b3c3f68..eaa0ff2736c50 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -659,6 +659,7 @@ extern const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_gf4_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_hr_b0; extern const struct iwl_cfg iwl_cfg_bnj_b0_fm_b0; +extern const struct iwl_cfg iwl_cfg_bnj_b0_fm4_b0; #endif /* CONFIG_IWLMVM */ #endif /* __IWL_CONFIG_H__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 99768d6a60322..8aa8a678475cb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1193,6 +1193,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_IS_JACKET, iwl_cfg_bnj_a0_fm4_a0, iwl_bz_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_GL, SILICON_B_STEP, + IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_IS_JACKET, + iwl_cfg_bnj_b0_fm4_b0, iwl_bz_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, -- GitLab From beddcdc489864e234af844d906547b1cb70e9036 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:30 +0200 Subject: [PATCH 0207/2078] wifi: iwlwifi: rs-fw: break out for unsupported bandwidth Currently the for loop runs also over unsupported bandwidth in the command, shorten the path in case we don't support it. Also use the right macro for setting BW20. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.0264ba9df63b.I6c7c9efc806e0ffb7cb3b6051b2d109646e8708c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index f30eeab5505b7..e3fb1b2cea6dd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -337,10 +337,14 @@ static void rs_fw_eht_set_enabled_rates(const struct ieee80211_sta *sta, const struct ieee80211_eht_mcs_nss_supp_bw *mcs_tx = rs_fw_rs_mcs2eht_mcs(bw, eht_tx_mcs); - /* got unsuppored index for bw */ + /* got unsupported index for bw */ if (!mcs_rx || !mcs_tx) continue; + /* break out if we don't support the bandwidth */ + if (cmd->max_ch_width < (bw + IWL_TLC_MNG_CH_WIDTH_80MHZ)) + break; + rs_fw_set_eht_mcs_nss(cmd->ht_rates, bw, MAX_NSS_MCS(9, mcs_rx, mcs_tx), GENMASK(9, 0)); rs_fw_set_eht_mcs_nss(cmd->ht_rates, bw, @@ -550,7 +554,7 @@ void rs_fw_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_tlc_config_cmd_v4 cfg_cmd = { .sta_id = mvmsta->sta_id, .max_ch_width = update ? - rs_fw_bw_from_sta_bw(sta) : RATE_MCS_CHAN_WIDTH_20, + rs_fw_bw_from_sta_bw(sta) : IWL_TLC_MNG_CH_WIDTH_20MHZ, .flags = cpu_to_le16(rs_fw_get_config_flags(mvm, sta, sband)), .chains = rs_fw_set_active_chains(iwl_mvm_get_valid_tx_ant(mvm)), .sgi_ch_width_supp = rs_fw_sgi_cw_support(sta), -- GitLab From 774302d2d3ad916f415d651362550daf22f630b7 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:31 +0200 Subject: [PATCH 0208/2078] wifi: iwlwifi: mvm: clean up duplicated defines VHT, HE and EHT rates use the same bits for NSS, so no need for defines per PHY version. Also use spatch to replace bit manipulation with FIELD_GET: @@ identifier rate; @@ -((rate & RATE_MCS_NSS_MSK) >> RATE_MCS_NSS_POS) +FIELD_GET(RATE_MCS_NSS_MSK, rate) Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.167ed9477aa8.Ibd8e71d31896e8d8f067ce4e3a6e9a0e86c78f3f@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/api/rs.h | 3 --- drivers/net/wireless/intel/iwlwifi/fw/rs.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 17 ++++++----------- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 6 ++---- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++---- 5 files changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h index 1d372a09ebb4a..c9a48fc5fac88 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h @@ -373,9 +373,6 @@ enum { /* Bit 4-5: (0) SISO, (1) MIMO2 (2) MIMO3 */ #define RATE_VHT_MCS_RATE_CODE_MSK 0xf -#define RATE_VHT_MCS_NSS_POS 4 -#define RATE_VHT_MCS_NSS_MSK (3 << RATE_VHT_MCS_NSS_POS) -#define RATE_VHT_MCS_MIMO2_MSK BIT(RATE_VHT_MCS_NSS_POS) /* * Legacy OFDM rate format for bits 7:0 diff --git a/drivers/net/wireless/intel/iwlwifi/fw/rs.c b/drivers/net/wireless/intel/iwlwifi/fw/rs.c index e128d2e07f38c..b09e68dbf5a93 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/rs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation */ #include @@ -126,7 +126,7 @@ u32 iwl_new_rate_from_v1(u32 rate_v1) rate_v1 & RATE_MCS_HE_MSK_V1) { rate_v2 |= rate_v1 & RATE_VHT_MCS_RATE_CODE_MSK; - rate_v2 |= rate_v1 & RATE_VHT_MCS_MIMO2_MSK; + rate_v2 |= rate_v1 & RATE_MCS_NSS_MSK; if (rate_v1 & RATE_MCS_HE_MSK_V1) { u32 he_type_bits = rate_v1 & RATE_MCS_HE_TYPE_MSK_V1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 0b50b816684a0..1f81dff71bc47 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** * - * Copyright(c) 2005 - 2014, 2018 - 2021 Intel Corporation. All rights reserved. + * Copyright(c) 2005 - 2014, 2018 - 2022 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH *****************************************************************************/ @@ -895,8 +895,7 @@ static int rs_rate_from_ucode_rate(const u32 ucode_rate, WARN_ON_ONCE(1); } } else if (ucode_rate & RATE_MCS_VHT_MSK_V1) { - nss = ((ucode_rate & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1; + nss = FIELD_GET(RATE_MCS_NSS_MSK, ucode_rate) + 1; if (nss == 1) { rate->type = LQ_VHT_SISO; @@ -910,8 +909,7 @@ static int rs_rate_from_ucode_rate(const u32 ucode_rate, WARN_ON_ONCE(1); } } else if (ucode_rate & RATE_MCS_HE_MSK_V1) { - nss = ((ucode_rate & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1; + nss = FIELD_GET(RATE_MCS_NSS_MSK, ucode_rate) + 1; if (nss == 1) { rate->type = LQ_HE_SISO; @@ -2885,8 +2883,7 @@ void iwl_mvm_update_frame_stats(struct iwl_mvm *mvm, u32 rate, bool agg) nss = ((rate & RATE_HT_MCS_NSS_MSK_V1) >> RATE_HT_MCS_NSS_POS_V1) + 1; } else if (rate & RATE_MCS_VHT_MSK_V1) { mvm->drv_rx_stats.vht_frames++; - nss = ((rate & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1; + nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1; } else { mvm->drv_rx_stats.legacy_frames++; } @@ -3665,8 +3662,7 @@ int rs_pretty_print_rate_v1(char *buf, int bufsz, const u32 rate) if (rate & RATE_MCS_VHT_MSK_V1) { type = "VHT"; mcs = rate & RATE_VHT_MCS_RATE_CODE_MSK; - nss = ((rate & RATE_VHT_MCS_NSS_MSK) - >> RATE_VHT_MCS_NSS_POS) + 1; + nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1; } else if (rate & RATE_MCS_HT_MSK_V1) { type = "HT"; mcs = rate & RATE_HT_MCS_INDEX_MSK_V1; @@ -3675,8 +3671,7 @@ int rs_pretty_print_rate_v1(char *buf, int bufsz, const u32 rate) } else if (rate & RATE_MCS_HE_MSK_V1) { type = "HE"; mcs = rate & RATE_VHT_MCS_RATE_CODE_MSK; - nss = ((rate & RATE_VHT_MCS_NSS_MSK) - >> RATE_VHT_MCS_NSS_POS) + 1; + nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1; } else { type = "Unknown"; /* shouldn't happen */ } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index ab9a51375c8ac..d2ce414879aa4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -253,8 +253,7 @@ static void iwl_mvm_rx_handle_tcm(struct iwl_mvm *mvm, ARRAY_SIZE(thresh_tpt))) return; thr = thresh_tpt[rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK]; - thr *= 1 + ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS); + thr *= 1 + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags); } thr <<= ((rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK_V1) >> @@ -500,8 +499,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, u8 stbc = (rate_n_flags & RATE_MCS_STBC_MSK) >> RATE_MCS_STBC_POS; rx_status->nss = - ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1; + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags) + 1; rx_status->rate_idx = rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK; rx_status->encoding = RX_ENC_VHT; rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 9813d7fa18007..a6d69885cd3fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1396,8 +1396,8 @@ void iwl_mvm_hwrate_to_tx_rate(u32 rate_n_flags, r->idx = rate; } else if (format == RATE_MCS_VHT_MSK) { ieee80211_rate_set_vht(r, rate, - ((rate_n_flags & RATE_MCS_NSS_MSK) >> - RATE_MCS_NSS_POS) + 1); + FIELD_GET(RATE_MCS_NSS_MSK, + rate_n_flags) + 1); r->flags |= IEEE80211_TX_RC_VHT_MCS; } else if (format == RATE_MCS_HE_MSK) { /* mac80211 cannot do this without ieee80211_tx_status_ext() @@ -1428,8 +1428,7 @@ void iwl_mvm_hwrate_to_tx_rate_v1(u32 rate_n_flags, } else if (rate_n_flags & RATE_MCS_VHT_MSK_V1) { ieee80211_rate_set_vht( r, rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK, - ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1); + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags) + 1); r->flags |= IEEE80211_TX_RC_VHT_MCS; } else { r->idx = iwl_mvm_legacy_rate_to_mac80211_idx(rate_n_flags, -- GitLab From 558f874ea049af3c0e2c25b4de2b7fcf3f40e43b Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Sun, 5 Mar 2023 14:16:32 +0200 Subject: [PATCH 0209/2078] wifi: iwlwifi: Update logs for yoyo reset sw changes Update the log category for the reset-fw changes. Signed-off-by: Mukesh Sisodiya Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.852a6b5f95fa.Ie67bd28da65c7e42424cacb37495930475de2dad@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 8e0bc1f5f6c60..87366b70b17f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -1218,11 +1218,12 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync, } fwrt->trans->dbg.restart_required = FALSE; - IWL_DEBUG_INFO(fwrt, "WRT: tp %d, reset_fw %d\n", - tp, dump_data.trig->reset_fw); - IWL_DEBUG_INFO(fwrt, "WRT: restart_required %d, last_tp_resetfw %d\n", - fwrt->trans->dbg.restart_required, - fwrt->trans->dbg.last_tp_resetfw); + IWL_DEBUG_FW(fwrt, "WRT: tp %d, reset_fw %d\n", + tp, dump_data.trig->reset_fw); + IWL_DEBUG_FW(fwrt, + "WRT: restart_required %d, last_tp_resetfw %d\n", + fwrt->trans->dbg.restart_required, + fwrt->trans->dbg.last_tp_resetfw); if (fwrt->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000) { @@ -1235,18 +1236,19 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync, IWL_DEBUG_FW(fwrt, "WRT: FW_ASSERT due to reset_fw_mode-no restart\n"); } else if (le32_to_cpu(dump_data.trig->reset_fw) == IWL_FW_INI_RESET_FW_MODE_STOP_AND_RELOAD_FW) { - IWL_DEBUG_INFO(fwrt, "WRT: stop and reload firmware\n"); + IWL_DEBUG_FW(fwrt, "WRT: stop and reload firmware\n"); fwrt->trans->dbg.restart_required = TRUE; } else if (le32_to_cpu(dump_data.trig->reset_fw) == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) { - IWL_DEBUG_INFO(fwrt, "WRT: stop only and no reload firmware\n"); + IWL_DEBUG_FW(fwrt, + "WRT: stop only and no reload firmware\n"); fwrt->trans->dbg.restart_required = FALSE; fwrt->trans->dbg.last_tp_resetfw = le32_to_cpu(dump_data.trig->reset_fw); } else if (le32_to_cpu(dump_data.trig->reset_fw) == IWL_FW_INI_RESET_FW_MODE_NOTHING) { - IWL_DEBUG_INFO(fwrt, - "WRT: nothing need to be done after debug collection\n"); + IWL_DEBUG_FW(fwrt, + "WRT: nothing need to be done after debug collection\n"); } else { IWL_ERR(fwrt, "WRT: wrong resetfw %d\n", le32_to_cpu(dump_data.trig->reset_fw)); -- GitLab From 7696c07bfbded95ce22aa41b6411534d0d06990d Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 5 Mar 2023 14:16:33 +0200 Subject: [PATCH 0210/2078] wifi: iwlwifi: mvm: add EHT RU allocation to radiotap FW new API added the info missing for update RU allocation, so use the new API to update radiotap information. Signed-off-by: Mordechay Goodstein Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.b16acaa4bad1.I53afa03058dbd2cd8afbaf5e82596c8ed501a476@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/fw/api/rx.h | 15 ++-- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 90 ++++++++++++++++++- 2 files changed, 95 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 97e946e70279a..fdd8b01f09e4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -414,7 +414,7 @@ enum iwl_rx_phy_eht_data2 { /* OFDM_RX_VECTOR_COMMON_RU_ALLOC_0_OUT */ IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A1 = 0x000001ff, IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A2 = 0x0003fe00, - IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A3 = 0x01fc0000, + IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_B1 = 0x07fc0000, /* info type: EHT-TB-EXT */ IWL_RX_PHY_DATA2_EHT_TB_EXT_TRIG_SIGA1 = 0xffffffff, @@ -424,19 +424,18 @@ enum iwl_rx_phy_eht_data2 { enum iwl_rx_phy_eht_data3 { /* info type: EHT-MU-EXT */ /* OFDM_RX_VECTOR_COMMON_RU_ALLOC_1_OUT */ - IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B1 = 0x000001ff, - IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B2 = 0x0003fe00, - IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B3 = 0x01fc0000, + IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B2 = 0x000001ff, + IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_C1 = 0x0003fe00, + IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_C2 = 0x07fc0000, }; /* goes into Metadata DW 4 */ enum iwl_rx_phy_eht_data4 { /* info type: EHT-MU-EXT */ /* OFDM_RX_VECTOR_COMMON_RU_ALLOC_2_OUT */ - IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C1 = 0x000001ff, - IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C2 = 0x0003fe00, - IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C3 = 0x01fc0000, - IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS = 0x18000000, + IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_D1 = 0x000001ff, + IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_D2 = 0x0003fe00, + IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS = 0x000c0000, }; /* goes into Metadata DW 16 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index b545673fb150b..c07ed33395b6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1468,17 +1468,54 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm, (_usig)->value |= LE32_DEC_ENC(in_value, dec_bits, _enc_bits); \ } while (0) +#define __IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ + eht->data[(rt_data)] |= \ + (cpu_to_le32 \ + (IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru ## _KNOWN) | \ + LE32_DEC_ENC(data ## fw_data, \ + IWL_RX_PHY_DATA ## fw_data ## _EHT_MU_EXT_RU_ALLOC_ ## fw_ru, \ + IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru)) + +#define _IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \ + __IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) + +#define IEEE80211_RADIOTAP_RU_DATA_1_1_1 1 +#define IEEE80211_RADIOTAP_RU_DATA_2_1_1 2 +#define IEEE80211_RADIOTAP_RU_DATA_1_1_2 2 +#define IEEE80211_RADIOTAP_RU_DATA_2_1_2 2 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_1 3 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_1 3 +#define IEEE80211_RADIOTAP_RU_DATA_1_2_2 3 +#define IEEE80211_RADIOTAP_RU_DATA_2_2_2 4 + +#define IWL_RX_RU_DATA_A1 2 +#define IWL_RX_RU_DATA_A2 2 +#define IWL_RX_RU_DATA_B1 2 +#define IWL_RX_RU_DATA_B2 3 +#define IWL_RX_RU_DATA_C1 3 +#define IWL_RX_RU_DATA_C2 3 +#define IWL_RX_RU_DATA_D1 4 +#define IWL_RX_RU_DATA_D2 4 + +#define IWL_MVM_ENC_EHT_RU(rt_ru, fw_ru) \ + _IWL_MVM_ENC_EHT_RU(IEEE80211_RADIOTAP_RU_DATA_ ## rt_ru, \ + rt_ru, \ + IWL_RX_RU_DATA_ ## fw_ru, \ + fw_ru) + static void iwl_mvm_decode_eht_ext_mu(struct iwl_mvm *mvm, struct iwl_mvm_rx_phy_data *phy_data, struct ieee80211_rx_status *rx_status, struct ieee80211_radiotap_eht *eht, struct ieee80211_radiotap_eht_usig *usig) { - __le32 data1 = phy_data->d1; - if (phy_data->with_data) { + __le32 data1 = phy_data->d1; + __le32 data2 = phy_data->d2; + __le32 data3 = phy_data->d3; __le32 data4 = phy_data->eht_d4; __le32 data5 = phy_data->d5; + u32 phy_bw = phy_data->rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK; IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5, IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP, @@ -1493,6 +1530,55 @@ static void iwl_mvm_decode_eht_ext_mu(struct iwl_mvm *mvm, (usig, data1, IWL_RX_PHY_DATA1_EHT_MU_NUM_SIG_SYM_USIGA2, IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS); + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN) | + LE32_DEC_ENC(data5, IWL_RX_PHY_DATA5_EHT_MU_STA_ID_USR, + IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID); + + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M); + eht->data[7] |= LE32_DEC_ENC + (data5, IWL_RX_PHY_DATA5_EHT_MU_NUM_USR_NON_OFDMA, + IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS); + + /* + * Hardware labels the content channels/RU allocation values + * as follows: + * Content Channel 1 Content Channel 2 + * 20 MHz: A1 + * 40 MHz: A1 B1 + * 80 MHz: A1 C1 B1 D1 + * 160 MHz: A1 C1 A2 C2 B1 D1 B2 D2 + * 320 MHz: A1 C1 A2 C2 A3 C3 A4 C4 B1 D1 B2 D2 B3 D3 B4 D4 + * + * However firmware can only give us A1-D2, so the higher + * frequencies are missing. + */ + + switch (phy_bw) { + case RATE_MCS_CHAN_WIDTH_320: + /* additional values are missing in RX metadata */ + case RATE_MCS_CHAN_WIDTH_160: + /* content channel 1 */ + IWL_MVM_ENC_EHT_RU(1_2_1, A2); + IWL_MVM_ENC_EHT_RU(1_2_2, C2); + /* content channel 2 */ + IWL_MVM_ENC_EHT_RU(2_2_1, B2); + IWL_MVM_ENC_EHT_RU(2_2_2, D2); + fallthrough; + case RATE_MCS_CHAN_WIDTH_80: + /* content channel 1 */ + IWL_MVM_ENC_EHT_RU(1_1_2, C1); + /* content channel 2 */ + IWL_MVM_ENC_EHT_RU(2_1_2, D1); + fallthrough; + case RATE_MCS_CHAN_WIDTH_40: + /* content channel 2 */ + IWL_MVM_ENC_EHT_RU(2_1_1, B1); + fallthrough; + case RATE_MCS_CHAN_WIDTH_20: + IWL_MVM_ENC_EHT_RU(1_1_1, A1); + break; + } } else { __le32 usig_a1 = phy_data->rx_vec[0]; __le32 usig_a2 = phy_data->rx_vec[1]; -- GitLab From b55c1f4ec535f87a4063a8cbb75b014cdfc41bcb Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 5 Mar 2023 14:16:34 +0200 Subject: [PATCH 0211/2078] wifi: iwlwifi: Do not include radiotap EHT user info if not needed Do not include user information in radtiotap EHT data for EHT sounding NDP as the frame doesn't include the user specific field. Instead, encode the NSS and the beamforming information in the EHT data. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.ac6474ded9bd.I9655589e9afbacc16820f35f6f5d90c6a91b8b05@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 58 ++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index c07ed33395b6e..2ea1fbf81d3b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1845,6 +1845,7 @@ static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_radiotap_eht *eht; struct ieee80211_radiotap_eht_usig *usig; + size_t eht_len = sizeof(*eht); u32 rate_n_flags = phy_data->rate_n_flags; u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK; @@ -1854,8 +1855,10 @@ static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, u32 bw; /* u32 for 1 user_info */ - eht = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, - sizeof(*eht) + sizeof(u32)); + if (phy_data->with_data) + eht_len += sizeof(u32); + + eht = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, eht_len); usig = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT_USIG, sizeof(*usig)); @@ -1946,27 +1949,40 @@ static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, rx_status->eht.gi)); } - eht->user_info[0] |= cpu_to_le32 - (IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN | - IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN | - IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O | - IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O | - IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER); - - if (rate_n_flags & RATE_MCS_BF_MSK) - eht->user_info[0] |= - cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O); - if (rate_n_flags & RATE_MCS_LDPC_MSK) + if (!phy_data->with_data) { + eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S | + IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S); + eht->data[7] |= + le32_encode_bits(le32_get_bits(phy_data->rx_vec[2], + RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK), + IEEE80211_RADIOTAP_EHT_DATA7_NSS_S); + if (rate_n_flags & RATE_MCS_BF_MSK) + eht->data[7] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S); + } else { eht->user_info[0] |= - cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_CODING); - - eht->user_info[0] |= cpu_to_le32 - (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS, - FIELD_GET(RATE_VHT_MCS_RATE_CODE_MSK, - rate_n_flags)) | - FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O, - FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags))); + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN | + IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN | + IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O | + IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O | + IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER); + + if (rate_n_flags & RATE_MCS_BF_MSK) + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O); + + if (rate_n_flags & RATE_MCS_LDPC_MSK) + eht->user_info[0] |= + cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_CODING); + + eht->user_info[0] |= cpu_to_le32 + (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS, + FIELD_GET(RATE_VHT_MCS_RATE_CODE_MSK, + rate_n_flags)) | + FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O, + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags))); + } } static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb, -- GitLab From da1185449c669076276027c600666286124eef9f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 5 Mar 2023 14:16:35 +0200 Subject: [PATCH 0212/2078] wifi: iwlwifi: mvm: fix EOF bit reporting In monitor mode, we try to report the EOF bit on the first MPDU of an A-MPDU (hardware duplicates this bit over all MPDUs, so it's only trustable on the first). However, due to reshuffling in an ealier commit, the toggle_bit != mvm->ampdu_toggle logic can no longer work since mvm->ampdu_toggle is now set before this code runs. Fix this by tracking the first_subframe status in the phy data struct and using that instead of checking. Fixes: f1490546bec9 ("wifi: iwlwifi: mvm: rxmq: refactor mac80211 rx_status setting") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230305124407.e273aa0d3fdc.I77db4cc247898eae8a98b80659386d6737052b95@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 2ea1fbf81d3b6..91556d43735a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1180,6 +1180,7 @@ struct iwl_mvm_rx_phy_data { __le32 d0, d1, d2, d3, eht_d4, d5; __le16 d4; bool with_data; + bool first_subframe; __le32 rx_vec[4]; u32 rate_n_flags; @@ -1885,15 +1886,10 @@ static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb, /* update aggregation data for monitor sake on default queue */ if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) && - (phy_info & IWL_RX_MPDU_PHY_AMPDU)) { - bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; - - /* toggle is switched whenever new aggregation starts */ - if (toggle_bit != mvm->ampdu_toggle) { - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } + (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) { + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; } if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) @@ -2036,15 +2032,10 @@ static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb, /* update aggregation data for monitor sake on default queue */ if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) && - (phy_info & IWL_RX_MPDU_PHY_AMPDU)) { - bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; - - /* toggle is switched whenever new aggregation starts */ - if (toggle_bit != mvm->ampdu_toggle) { - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } + (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) { + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF)) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; } if (he_type == RATE_MCS_HE_TYPE_EXT_SU && @@ -2447,6 +2438,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, if (mvm->ampdu_ref == 0) mvm->ampdu_ref++; mvm->ampdu_toggle = toggle_bit; + phy_data.first_subframe = true; } rx_status->ampdu_reference = mvm->ampdu_ref; } -- GitLab From ae4fe46983007bc46d87dcb284a5e5851c3e1c84 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Thu, 16 Feb 2023 10:07:31 +0800 Subject: [PATCH 0213/2078] igc: Add qbv_config_change_errors counter Add ConfigChangeError(qbv_config_change_errors) when user try to set the AdminBaseTime to past value while the current GCL is still running. The ConfigChangeError counter should not be increased when a gate control list is scheduled into the future. User can use "ethtool -S | grep qbv_config_change_errors" command to check the counter values. Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 1 + drivers/net/ethernet/intel/igc/igc_tsn.c | 12 ++++++++++++ 4 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index df3e26c0cf01a..79cc99af43174 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -185,6 +185,7 @@ struct igc_adapter { ktime_t base_time; ktime_t cycle_time; bool qbv_enable; + u32 qbv_config_change_errors; /* OS defined structs */ struct pci_dev *pdev; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 5a26a7805ef80..0e2cb00622d1a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -67,6 +67,7 @@ static const struct igc_stats igc_gstrings_stats[] = { IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), IGC_STAT("tx_lpi_counter", stats.tlpic), IGC_STAT("rx_lpi_counter", stats.rlpic), + IGC_STAT("qbv_config_change_errors", qbv_config_change_errors), }; #define IGC_NETDEV_STAT(_net_stat) { \ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2928a6c736928..4992cca4029dd 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6049,6 +6049,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; + adapter->qbv_config_change_errors = 0; for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index a386c8d61dbf9..94a2b0dfb54d4 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -114,6 +114,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + bool tsn_mode_reconfig = false; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; @@ -226,6 +227,10 @@ skip_cbs: } tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; + + if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) + tsn_mode_reconfig = true; + tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; cycle = adapter->cycle_time; @@ -239,6 +244,13 @@ skip_cbs: s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); + + /* Increase the counter if scheduling into the past while + * Gate Control List (GCL) is running. + */ + if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + tsn_mode_reconfig) + adapter->qbv_config_change_errors++; } else { /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit * has to be configured before the cycle time and base time. -- GitLab From 92a0dcb8427dc52eb0ec806206e2bdb23cc13840 Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Thu, 16 Feb 2023 09:16:24 +0800 Subject: [PATCH 0214/2078] igc: offload queue max SDU from tc-taprio Add support for configuring the max SDU for each Tx queue. If not specified, keep the default. Signed-off-by: Tan Tee Min Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_hw.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 33 ++++++++++++++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 79cc99af43174..c0c00b8bd8d89 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -99,6 +99,7 @@ struct igc_ring { u32 start_time; u32 end_time; + u32 max_sdu; /* CBS parameters */ bool cbs_enable; /* indicates if CBS is enabled */ diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 88680e3d613dd..e1c572e0d4ef0 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -273,6 +273,7 @@ struct igc_hw_stats { u64 o2bspc; u64 b2ospc; u64 b2ogprc; + u64 txdrop; }; struct net_device *igc_get_hw_dev(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4992cca4029dd..1e1245085a36d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1501,6 +1501,7 @@ static int igc_tso(struct igc_ring *tx_ring, static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, struct igc_ring *tx_ring) { + struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); bool first_flag = false, insert_empty = false; u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); @@ -1563,9 +1564,19 @@ done: first->bytecount = skb->len; first->gso_segs = 1; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + if (tx_ring->max_sdu > 0) { + u32 max_sdu = 0; + + max_sdu = tx_ring->max_sdu + + (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); + if (first->bytecount > max_sdu) { + adapter->stats.txdrop++; + goto out_drop; + } + } + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. @@ -4920,7 +4931,8 @@ void igc_update_stats(struct igc_adapter *adapter) net_stats->tx_window_errors = adapter->stats.latecol; net_stats->tx_carrier_errors = adapter->stats.tncrs; - /* Tx Dropped needs to be maintained elsewhere */ + /* Tx Dropped */ + net_stats->tx_dropped = adapter->stats.txdrop; /* Management Stats */ adapter->stats.mgptc += rd32(IGC_MGTPTC); @@ -6056,6 +6068,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) ring->start_time = 0; ring->end_time = NSEC_PER_SEC; + ring->max_sdu = 0; } return 0; @@ -6139,6 +6152,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, } } + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + struct net_device *dev = adapter->netdev; + + if (qopt->max_sdu[i]) + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; + else + ring->max_sdu = 0; + } + return 0; } @@ -6237,8 +6260,10 @@ static int igc_tc_query_caps(struct igc_adapter *adapter, caps->broken_mqprio = true; - if (hw->mac.type == igc_i225) + if (hw->mac.type == igc_i225) { + caps->supports_queue_max_sdu = true; caps->gate_mask_per_txq = true; + } return 0; } -- GitLab From 6cc1b2fd736dbc4c846078459c3c00a0372247da Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 20 Nov 2022 10:15:11 +0200 Subject: [PATCH 0215/2078] igc: Clean up and optimize watchdog task i225/i226 parts used only one media type copper. The copper media type is not replaceable. Clean up the code accordingly, and remove the obsolete media replacement and reset options. Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 -- drivers/net/ethernet/intel/igc/igc_main.c | 17 ----------------- 2 files changed, 19 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c0c00b8bd8d89..34aebf00a5123 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -294,8 +294,6 @@ extern char igc_driver_name[]; #define IGC_FLAG_PTP BIT(8) #define IGC_FLAG_WOL_SUPPORTED BIT(8) #define IGC_FLAG_NEED_LINK_UPDATE BIT(9) -#define IGC_FLAG_MEDIA_RESET BIT(10) -#define IGC_FLAG_MAS_ENABLE BIT(12) #define IGC_FLAG_HAS_MSIX BIT(13) #define IGC_FLAG_EEE BIT(14) #define IGC_FLAG_VLAN_PROMISC BIT(15) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1e1245085a36d..2c40796c151fb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5578,25 +5578,8 @@ no_wait: mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); - /* link is down, time to check for alternate media */ - if (adapter->flags & IGC_FLAG_MAS_ENABLE) { - if (adapter->flags & IGC_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } - } pm_schedule_suspend(netdev->dev.parent, MSEC_PER_SEC * 5); - - /* also check for alternate media here */ - } else if (!netif_carrier_ok(netdev) && - (adapter->flags & IGC_FLAG_MAS_ENABLE)) { - if (adapter->flags & IGC_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } } } -- GitLab From 3ecde2182adbb12b52b777d8fb4a599b43faf4f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 7 Mar 2023 21:55:04 +0000 Subject: [PATCH 0216/2078] libbpf: Fix theoretical u32 underflow in find_cd() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity reported a potential underflow of the offset variable used in the find_cd() function. Switch to using a signed 64 bit integer for the representation of offset to make sure we can never underflow. Fixes: 1eebcb60633f ("libbpf: Implement basic zip archive parsing support") Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230307215504.837321-1-deso@posteo.net --- tools/lib/bpf/zip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index 8458c2dd0e3bc..f561aa07438f5 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -168,9 +168,8 @@ static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) static int find_cd(struct zip_archive *archive) { + int64_t limit, offset; int rc = -EINVAL; - int64_t limit; - __u32 offset; if (archive->size <= sizeof(struct end_of_cd_record)) return -EINVAL; -- GitLab From 98e678e9bc5859dec605794730ad37d26789bd30 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 7 Mar 2023 12:04:39 +0000 Subject: [PATCH 0217/2078] libbpf: Refactor parse_usdt_arg() to re-use code The parse_usdt_arg() function is defined differently for each architecture but the last part of the function is repeated verbatim for each architecture. Refactor parse_usdt_arg() to fill the arg_sz and then do the repeated post-processing in parse_usdt_spec(). Signed-off-by: Puranjay Mohan Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230307120440.25941-2-puranjay12@gmail.com --- tools/lib/bpf/usdt.c | 124 +++++++++++++++---------------------------- 1 file changed, 42 insertions(+), 82 deletions(-) diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 75b411fc2c77b..72faa8162e346 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1141,12 +1141,13 @@ static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, return 0; } -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) { + struct usdt_arg_spec *arg; const char *s; - int len; + int arg_sz, len; spec->usdt_cookie = usdt_cookie; spec->arg_cnt = 0; @@ -1159,10 +1160,25 @@ static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, return -E2BIG; } - len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); + arg = &spec->args[spec->arg_cnt]; + len = parse_usdt_arg(s, spec->arg_cnt, arg, &arg_sz); if (len < 0) return len; + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + spec->arg_cnt, s, arg_sz); + return -EINVAL; + } + s += len; spec->arg_cnt++; } @@ -1219,13 +1235,13 @@ static int calc_pt_regs_off(const char *reg_name) return -ENOENT; } -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { char reg_name[16]; - int arg_sz, len, reg_off; + int len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1233,7 +1249,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", arg_sz, reg_name, &len) == 2) { /* Memory dereference case without offset, e.g., 8@(%rsp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = 0; @@ -1241,7 +1257,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; @@ -1250,7 +1266,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { + } else if (sscanf(arg_str, " %d @ $%ld %n", arg_sz, &off, &len) == 2) { /* Constant value case, e.g., 4@$71 */ arg->arg_type = USDT_ARG_CONST; arg->val_off = off; @@ -1260,20 +1276,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } - arg->arg_signed = arg_sz < 0; - if (arg_sz < 0) - arg_sz = -arg_sz; - - switch (arg_sz) { - case 1: case 2: case 4: case 8: - arg->arg_bitshift = 64 - arg_sz * 8; - break; - default: - pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", - arg_num, arg_str, arg_sz); - return -EINVAL; - } - return len; } @@ -1281,13 +1283,13 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec /* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { unsigned int reg; - int arg_sz, len; + int len; long off; - if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", arg_sz, &off, ®, &len) == 3) { /* Memory dereference case, e.g., -2@-28(%r15) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1296,7 +1298,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } arg->reg_off = offsetof(user_pt_regs, gprs[reg]); - } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %%r%u %n", arg_sz, ®, &len) == 2) { /* Register read case, e.g., -8@%r0 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; @@ -1305,7 +1307,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } arg->reg_off = offsetof(user_pt_regs, gprs[reg]); - } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { /* Constant value case, e.g., 4@71 */ arg->arg_type = USDT_ARG_CONST; arg->val_off = off; @@ -1315,20 +1317,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } - arg->arg_signed = arg_sz < 0; - if (arg_sz < 0) - arg_sz = -arg_sz; - - switch (arg_sz) { - case 1: case 2: case 4: case 8: - arg->arg_bitshift = 64 - arg_sz * 8; - break; - default: - pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", - arg_num, arg_str, arg_sz); - return -EINVAL; - } - return len; } @@ -1348,13 +1336,13 @@ static int calc_pt_regs_off(const char *reg_name) return -ENOENT; } -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { char reg_name[16]; - int arg_sz, len, reg_off; + int len, reg_off; long off; - if (sscanf(arg_str, " %d @ \[ %15[a-z0-9], %ld ] %n", &arg_sz, reg_name, &off, &len) == 3) { + if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , %ld ] %n", arg_sz, reg_name, &off, &len) == 3) { /* Memory dereference case, e.g., -4@[sp, 96] */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1362,7 +1350,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", &arg_sz, reg_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) { /* Memory dereference case, e.g., -4@[sp] */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = 0; @@ -1370,12 +1358,12 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { /* Constant value case, e.g., 4@5 */ arg->arg_type = USDT_ARG_CONST; arg->val_off = off; arg->reg_off = 0; - } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -8@x4 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; @@ -1388,20 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } - arg->arg_signed = arg_sz < 0; - if (arg_sz < 0) - arg_sz = -arg_sz; - - switch (arg_sz) { - case 1: case 2: case 4: case 8: - arg->arg_bitshift = 64 - arg_sz * 8; - break; - default: - pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", - arg_num, arg_str, arg_sz); - return -EINVAL; - } - return len; } @@ -1456,13 +1430,13 @@ static int calc_pt_regs_off(const char *reg_name) return -ENOENT; } -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { char reg_name[16]; - int arg_sz, len, reg_off; + int len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -8@-88(s0) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1470,12 +1444,12 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { /* Constant value case, e.g., 4@5 */ arg->arg_type = USDT_ARG_CONST; arg->val_off = off; arg->reg_off = 0; - } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -8@a1 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; @@ -1488,26 +1462,12 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return -EINVAL; } - arg->arg_signed = arg_sz < 0; - if (arg_sz < 0) - arg_sz = -arg_sz; - - switch (arg_sz) { - case 1: case 2: case 4: case 8: - arg->arg_bitshift = 64 - arg_sz * 8; - break; - default: - pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", - arg_num, arg_str, arg_sz); - return -EINVAL; - } - return len; } #else -static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); return -ENOTSUP; -- GitLab From 720d93b60aec22abcb2a5b6d8de472f3a50694b1 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 7 Mar 2023 12:04:40 +0000 Subject: [PATCH 0218/2078] libbpf: USDT arm arg parsing support Parsing of USDT arguments is architecture-specific; on arm it is relatively easy since registers used are r[0-10], fp, ip, sp, lr, pc. Format is slightly different compared to aarch64; forms are - "size @ [ reg, #offset ]" for dereferences, for example "-8 @ [ sp, #76 ]" ; " -4 @ [ sp ]" - "size @ reg" for register values; for example "-4@r0" - "size @ #value" for raw values; for example "-8@#1" Add support for parsing USDT arguments for ARM architecture. To test the above changes QEMU's virt[1] board with cortex-a15 CPU was used. libbpf-bootstrap's usdt example[2] was modified to attach to a test program with DTRACE_PROBE1/2/3/4... probes to test different combinations. [1] https://www.qemu.org/docs/master/system/arm/virt.html [2] https://github.com/libbpf/libbpf-bootstrap/blob/master/examples/c/usdt.bpf.c Signed-off-by: Puranjay Mohan Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230307120440.25941-3-puranjay12@gmail.com --- tools/lib/bpf/usdt.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 72faa8162e346..b8402e3f9eb22 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1465,6 +1465,86 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return len; } +#elif defined(__arm__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "r0", offsetof(struct pt_regs, uregs[0]) }, + { "r1", offsetof(struct pt_regs, uregs[1]) }, + { "r2", offsetof(struct pt_regs, uregs[2]) }, + { "r3", offsetof(struct pt_regs, uregs[3]) }, + { "r4", offsetof(struct pt_regs, uregs[4]) }, + { "r5", offsetof(struct pt_regs, uregs[5]) }, + { "r6", offsetof(struct pt_regs, uregs[6]) }, + { "r7", offsetof(struct pt_regs, uregs[7]) }, + { "r8", offsetof(struct pt_regs, uregs[8]) }, + { "r9", offsetof(struct pt_regs, uregs[9]) }, + { "r10", offsetof(struct pt_regs, uregs[10]) }, + { "fp", offsetof(struct pt_regs, uregs[11]) }, + { "ip", offsetof(struct pt_regs, uregs[12]) }, + { "sp", offsetof(struct pt_regs, uregs[13]) }, + { "lr", offsetof(struct pt_regs, uregs[14]) }, + { "pc", offsetof(struct pt_regs, uregs[15]) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + char reg_name[16]; + int len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , #%ld ] %n", + arg_sz, reg_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[fp, #96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ #%ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@#5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { + /* Register read case, e.g., -8@r4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + #else static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) -- GitLab From 0194b64578e905dc8f112e641a71c306bd58ddde Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:51:35 +0100 Subject: [PATCH 0219/2078] net: phy: improve phy_read_poll_timeout cond sometimes is (val & MASK) what may result in a false positive if val is a negative errno. We shouldn't evaluate cond if val < 0. This has no functional impact here, but it's not nice. Therefore switch order of the checks. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/6d8274ac-4344-23b4-d9a3-cad4c39517d4@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 36bf0bbc8efa0..fefd5091bc244 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1130,16 +1130,15 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) #define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \ timeout_us, sleep_before_read) \ ({ \ - int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \ + int __ret = read_poll_timeout(phy_read, val, val < 0 || (cond), \ sleep_us, timeout_us, sleep_before_read, phydev, regnum); \ - if (val < 0) \ + if (val < 0) \ __ret = val; \ if (__ret) \ phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ __ret; \ }) - /** * __phy_read - convenience function for reading a given PHY register * @phydev: the phy_device struct -- GitLab From 40bbae583ec38ea31e728bf42a4ea72bded22ab6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Mar 2023 20:43:13 +0000 Subject: [PATCH 0220/2078] net: remove enum skb_free_reason enum skb_drop_reason is more generic, we can adopt it instead. Provide dev_kfree_skb_irq_reason() and dev_kfree_skb_any_reason(). This means drivers can use more precise drop reasons if they want to. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Yunsheng Lin Link: https://lore.kernel.org/r/20230306204313.10492-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 18 +++++++----------- net/core/dev.c | 20 +++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a14b7b117668..ee483071cf599 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -3804,13 +3805,8 @@ static inline unsigned int get_netdev_rx_queue_index( int netif_get_num_default_rss_queues(void); -enum skb_free_reason { - SKB_REASON_CONSUMED, - SKB_REASON_DROPPED, -}; - -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason); /* * It is not allowed to call kfree_skb() or consume_skb() from hardware @@ -3833,22 +3829,22 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); */ static inline void dev_kfree_skb_irq(struct sk_buff *skb) { - __dev_kfree_skb_irq(skb, SKB_REASON_DROPPED); + dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_irq(struct sk_buff *skb) { - __dev_kfree_skb_irq(skb, SKB_REASON_CONSUMED); + dev_kfree_skb_irq_reason(skb, SKB_CONSUMED); } static inline void dev_kfree_skb_any(struct sk_buff *skb) { - __dev_kfree_skb_any(skb, SKB_REASON_DROPPED); + dev_kfree_skb_any_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_any(struct sk_buff *skb) { - __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); + dev_kfree_skb_any_reason(skb, SKB_CONSUMED); } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, diff --git a/net/core/dev.c b/net/core/dev.c index 253584777101f..c7853192563d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3075,7 +3075,7 @@ void __netif_schedule(struct Qdisc *q) EXPORT_SYMBOL(__netif_schedule); struct dev_kfree_skb_cb { - enum skb_free_reason reason; + enum skb_drop_reason reason; }; static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) @@ -3108,7 +3108,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue) } EXPORT_SYMBOL(netif_tx_wake_queue); -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason) { unsigned long flags; @@ -3128,18 +3128,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } -EXPORT_SYMBOL(__dev_kfree_skb_irq); +EXPORT_SYMBOL(dev_kfree_skb_irq_reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (in_hardirq() || irqs_disabled()) - __dev_kfree_skb_irq(skb, reason); - else if (unlikely(reason == SKB_REASON_DROPPED)) - kfree_skb(skb); + dev_kfree_skb_irq_reason(skb, reason); else - consume_skb(skb); + kfree_skb_reason(skb, reason); } -EXPORT_SYMBOL(__dev_kfree_skb_any); +EXPORT_SYMBOL(dev_kfree_skb_any_reason); /** @@ -5020,11 +5018,11 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(refcount_read(&skb->users)); - if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED)) trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, - SKB_DROP_REASON_NOT_SPECIFIED); + get_kfree_skb_cb(skb)->reason); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); -- GitLab From 4310e2f42030ffda0997fb87e2ebdd9711030218 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 23:10:57 +0100 Subject: [PATCH 0221/2078] net: phy: smsc: simplify lan95xx_config_aneg_ext lan95xx_config_aneg_ext() can be simplified by using phy_set_bits(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3da785c7-3ef8-b5d3-89a0-340f550be3c2@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index ac7481ce2fc16..af89f3ef1c4fe 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -178,18 +178,15 @@ static int lan87xx_config_aneg(struct phy_device *phydev) static int lan95xx_config_aneg_ext(struct phy_device *phydev) { - int rc; - - if (phydev->phy_id != 0x0007c0f0) /* not (LAN9500A or LAN9505A) */ - return lan87xx_config_aneg(phydev); + if (phydev->phy_id == 0x0007c0f0) { /* LAN9500A or LAN9505A */ + /* Extend Manual AutoMDIX timer */ + int rc = phy_set_bits(phydev, PHY_EDPD_CONFIG, + PHY_EDPD_CONFIG_EXT_CROSSOVER_); - /* Extend Manual AutoMDIX timer */ - rc = phy_read(phydev, PHY_EDPD_CONFIG); - if (rc < 0) - return rc; + if (rc < 0) + return rc; + } - rc |= PHY_EDPD_CONFIG_EXT_CROSSOVER_; - phy_write(phydev, PHY_EDPD_CONFIG, rc); return lan87xx_config_aneg(phydev); } -- GitLab From 2549347972a8e94a9261c125e494955d873b1a9f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Mar 2023 17:40:28 -0600 Subject: [PATCH 0222/2078] netxen_nic: Replace fake flex-array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Transform zero-length array into flexible-array member in struct nx_cardrsp_rx_ctx_t. Address the following warnings found with GCC-13 and -fstrict-flex-arrays=3 enabled: drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c:361:26: warning: array subscript is outside array bounds of ‘char[0]’ [-Warray-bounds=] drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c:372:25: warning: array subscript is outside array bounds of ‘char[0]’ [-Warray-bounds=] This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/265 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZAZ57I6WdQEwWh7v@work Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/netxen/netxen_nic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index f13fa7396aef1..3d36d23df0c6e 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -854,7 +854,7 @@ typedef struct { The following is packed: - N cardrsp_rds_rings - N cardrs_sds_rings */ - char data[0]; + char data[]; } nx_cardrsp_rx_ctx_t; #define SIZEOF_HOSTRQ_RX(HOSTRQ_RX, rds_rings, sds_rings) \ -- GitLab From 8ca5a5790b9a1ce147484d2a2c4e66d2553f3d6c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 6 Mar 2023 08:07:38 -0800 Subject: [PATCH 0223/2078] net-timestamp: extend SOF_TIMESTAMPING_OPT_ID to HW timestamps When the feature was added it was enabled for SW timestamps only but with current hardware the same out-of-order timestamps can be seen. Let's expand the area for the feature to all types of timestamps. Signed-off-by: Vadim Fedorenko Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4e4e308c3230a..e7bef36ce26f5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -990,7 +990,7 @@ static int __ip_append_data(struct sock *sk, mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; paged = !!cork->gso_size; - if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + if (cork->tx_flags & SKBTX_ANY_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c314fdde0097c..4ce3f9d3bc8ac 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1500,7 +1500,7 @@ static int __ip6_append_data(struct sock *sk, mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; - if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + if (cork->tx_flags & SKBTX_ANY_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = atomic_inc_return(&sk->sk_tskey) - 1; -- GitLab From 91c8643578a21e435c412ffbe902bb4b4773e262 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:23:15 +0100 Subject: [PATCH 0224/2078] r8169: use spinlock to protect mac ocp register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For disabling ASPM during NAPI poll we'll have to access mac ocp registers in atomic context. This could result in races because a mac ocp read consists of a write to register OCPDR, followed by a read from the same register. Therefore add a spinlock to protect access to mac ocp registers. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 37 ++++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 45147a1016bec..259eac5b06166 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -613,6 +613,8 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t mac_ocp_lock; + unsigned supports_gmii:1; unsigned aspm_manageable:1; dma_addr_t counters_phys_addr; @@ -847,7 +849,7 @@ static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) (RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT; } -static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +static void __r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) { if (rtl_ocp_reg_failure(reg)) return; @@ -855,7 +857,16 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data); } -static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +{ + unsigned long flags; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + __r8168_mac_ocp_write(tp, reg, data); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); +} + +static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) { if (rtl_ocp_reg_failure(reg)) return 0; @@ -865,12 +876,28 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) return RTL_R32(tp, OCPDR); } +static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +{ + unsigned long flags; + u16 val; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + val = __r8168_mac_ocp_read(tp, reg); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + + return val; +} + static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask, u16 set) { - u16 data = r8168_mac_ocp_read(tp, reg); + unsigned long flags; + u16 data; - r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + data = __r8168_mac_ocp_read(tp, reg); + __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); } /* Work around a hw issue with RTL8168g PHY, the quirk disables @@ -5176,6 +5203,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->mac_ocp_lock); + dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, struct pcpu_sw_netstats); if (!dev->tstats) -- GitLab From 6bc6c4e6893ee79a9862c61d1635e7da6d5a3333 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:24:00 +0100 Subject: [PATCH 0225/2078] r8169: use spinlock to protect access to registers Config2 and Config5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For disabling ASPM during NAPI poll we'll have to access both registers in atomic context. Use a spinlock to protect access. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 47 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 259eac5b06166..e6f3f19474889 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -613,6 +613,7 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t config25_lock; spinlock_t mac_ocp_lock; unsigned supports_gmii:1; @@ -677,6 +678,28 @@ static void rtl_pci_commit(struct rtl8169_private *tp) RTL_R8(tp, ChipCmd); } +static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config2); + RTL_W8(tp, Config2, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + +static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config5); + RTL_W8(tp, Config5, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + static bool rtl_is_8125(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_61; @@ -1363,6 +1386,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) { WAKE_MAGIC, Config3, MagicPacket } }; unsigned int i, tmp = ARRAY_SIZE(cfg); + unsigned long flags; u8 options; rtl_unlock_config_regs(tp); @@ -1381,12 +1405,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0); } + spin_lock_irqsave(&tp->config25_lock, flags); for (i = 0; i < tmp; i++) { options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask; if (wolopts & cfg[i].opt) options |= cfg[i].mask; RTL_W8(tp, cfg[i].reg, options); } + spin_unlock_irqrestore(&tp->config25_lock, flags); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: @@ -1398,10 +1424,10 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: - options = RTL_R8(tp, Config2) & ~PME_SIGNAL; if (wolopts) - options |= PME_SIGNAL; - RTL_W8(tp, Config2, options); + rtl_mod_config2(tp, 0, PME_SIGNAL); + else + rtl_mod_config2(tp, PME_SIGNAL, 0); break; default: break; @@ -2704,8 +2730,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { - RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); - RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); + rtl_mod_config5(tp, 0, ASPM_en); + rtl_mod_config2(tp, 0, ClkReqEn); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: @@ -2728,8 +2754,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) break; } - RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); + rtl_mod_config2(tp, ClkReqEn, 0); + rtl_mod_config5(tp, ASPM_en, 0); } udelay(10); @@ -2890,7 +2916,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); } static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) @@ -2923,7 +2949,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -2946,7 +2972,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl8168_config_eee_mac(tp); } @@ -5203,6 +5229,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->config25_lock); spin_lock_init(&tp->mac_ocp_lock); dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, -- GitLab From 59ee97c0c1a8d0dadc092897ca7d5fe3a80e1bc3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:24:49 +0100 Subject: [PATCH 0226/2078] r8169: enable cfg9346 config register access in atomic context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For disabling ASPM during NAPI poll we'll have to unlock access to the config registers in atomic context. Other code parts running with config register access unlocked are partially longer and can sleep. Add a usage counter to enable parallel execution of code parts requiring unlocked config registers. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index e6f3f19474889..61cbf498fd077 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -616,6 +616,9 @@ struct rtl8169_private { spinlock_t config25_lock; spinlock_t mac_ocp_lock; + spinlock_t cfg9346_usage_lock; + int cfg9346_usage_count; + unsigned supports_gmii:1; unsigned aspm_manageable:1; dma_addr_t counters_phys_addr; @@ -664,12 +667,22 @@ static inline struct device *tp_to_dev(struct rtl8169_private *tp) static void rtl_lock_config_regs(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + unsigned long flags; + + spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); + if (!--tp->cfg9346_usage_count) + RTL_W8(tp, Cfg9346, Cfg9346_Lock); + spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); } static void rtl_unlock_config_regs(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + unsigned long flags; + + spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); + if (!tp->cfg9346_usage_count++) + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); } static void rtl_pci_commit(struct rtl8169_private *tp) @@ -5229,6 +5242,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->cfg9346_usage_lock); spin_lock_init(&tp->config25_lock); spin_lock_init(&tp->mac_ocp_lock); -- GitLab From 49ef7d846d4bd77b0b9f1f801fc765b004690a07 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:25:49 +0100 Subject: [PATCH 0227/2078] r8169: prepare rtl_hw_aspm_clkreq_enable for usage in atomic context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bail out if the function is used with chip versions that don't support ASPM configuration. In addition remove the delay, it tuned out that it's not needed, also vendor driver r8125 doesn't have it. Suggested-by: Kai-Heng Feng Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 61cbf498fd077..96af31aeab0ab 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2741,6 +2741,9 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp) static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { + if (tp->mac_version < RTL_GIGA_MAC_VER_32) + return; + /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { rtl_mod_config5(tp, 0, ASPM_en); @@ -2770,8 +2773,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config2(tp, ClkReqEn, 0); rtl_mod_config5(tp, ASPM_en, 0); } - - udelay(10); } static void rtl_set_fifo_size(struct rtl8169_private *tp, u16 rx_stat, -- GitLab From e1ed3e4d91112027b90c7ee61479141b3f948e6a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:26:47 +0100 Subject: [PATCH 0228/2078] r8169: disable ASPM during NAPI poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several chip versions have problems with ASPM, what may result in rx_missed errors or tx timeouts. The root cause isn't known but experience shows that disabling ASPM during NAPI poll can avoid these problems. Suggested-by: Kai-Heng Feng Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 96af31aeab0ab..2897b9bf29af6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4577,6 +4577,10 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) } if (napi_schedule_prep(&tp->napi)) { + rtl_unlock_config_regs(tp); + rtl_hw_aspm_clkreq_enable(tp, false); + rtl_lock_config_regs(tp); + rtl_irq_disable(tp); __napi_schedule(&tp->napi); } @@ -4636,9 +4640,14 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) work_done = rtl_rx(dev, tp, budget); - if (work_done < budget && napi_complete_done(napi, work_done)) + if (work_done < budget && napi_complete_done(napi, work_done)) { rtl_irq_enable(tp); + rtl_unlock_config_regs(tp); + rtl_hw_aspm_clkreq_enable(tp, true); + rtl_lock_config_regs(tp); + } + return work_done; } -- GitLab From 2ab19de62d67e403105ba860971e5ff0d511ad15 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:28:06 +0100 Subject: [PATCH 0229/2078] r8169: remove ASPM restrictions now that ASPM is disabled during NAPI poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that ASPM is disabled during NAPI poll, we can remove all ASPM restrictions. This allows for higher power savings if the network isn't fully loaded. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 27 +---------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 2897b9bf29af6..6563e4c6a1360 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -620,7 +620,6 @@ struct rtl8169_private { int cfg9346_usage_count; unsigned supports_gmii:1; - unsigned aspm_manageable:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -2744,8 +2743,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) if (tp->mac_version < RTL_GIGA_MAC_VER_32) return; - /* Don't enable ASPM in the chip if OS can't control ASPM */ - if (enable && tp->aspm_manageable) { + if (enable) { rtl_mod_config5(tp, 0, ASPM_en); rtl_mod_config2(tp, 0, ClkReqEn); @@ -5221,16 +5219,6 @@ done: rtl_rar_set(tp, mac_addr); } -/* register is set if system vendor successfully tested ASPM 1.2 */ -static bool rtl_aspm_is_safe(struct rtl8169_private *tp) -{ - if (tp->mac_version >= RTL_GIGA_MAC_VER_61 && - r8168_mac_ocp_read(tp, 0xc0b2) & 0xf) - return true; - - return false; -} - static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct rtl8169_private *tp; @@ -5302,19 +5290,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->mac_version = chipset; - /* Disable ASPM L1 as that cause random device stop working - * problems as well as full system hangs for some PCIe devices users. - * Chips from RTL8168h partially have issues with L1.2, but seem - * to work fine with L1 and L1.1. - */ - if (rtl_aspm_is_safe(tp)) - rc = 0; - else if (tp->mac_version >= RTL_GIGA_MAC_VER_46) - rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); - else - rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); - tp->aspm_manageable = !rc; - tp->dash_type = rtl_check_dash(tp); tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; -- GitLab From 966b6b809f900b6c41f3b559fd57ec567d735dc9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Mar 2023 17:51:52 -0600 Subject: [PATCH 0230/2078] net/mlx4_en: Replace fake flex-array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Transform zero-length array into flexible-array member in struct mlx4_en_rx_desc. Address the following warnings found with GCC-13 and -fstrict-flex-arrays=3 enabled: drivers/net/ethernet/mellanox/mlx4/en_rx.c:88:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:149:30: warning: array subscript 0 is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:127:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:128:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:129:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:117:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] drivers/net/ethernet/mellanox/mlx4/en_rx.c:119:30: warning: array subscript i is outside array bounds of ‘struct mlx4_wqe_data_seg[0]’ [-Warray-bounds=] This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/264 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 544e09b97483c..034733b13b1a1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -323,7 +323,7 @@ struct mlx4_en_tx_ring { struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ - struct mlx4_wqe_data_seg data[0]; + DECLARE_FLEX_ARRAY(struct mlx4_wqe_data_seg, data); }; struct mlx4_en_rx_ring { -- GitLab From ecf729f93bd5a2117ef82d1e281bfc780c643e36 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 7 Mar 2023 13:41:38 +0800 Subject: [PATCH 0231/2078] emulex/benet: clean up some inconsistent indenting No functional modification involved. drivers/net/ethernet/emulex/benet/be_cmds.c:1120 be_cmd_pmac_add() warn: inconsistent indenting. Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4396 Signed-off-by: Jiapeng Chong Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 27 +++++++++++---------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 08ec84cd21c04..61adcebeef010 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -135,7 +135,8 @@ static int be_mcc_notify(struct be_adapter *adapter) /* To check if valid bit is set, check the entire word as we don't know * the endianness of the data (old entry is host endian while a new entry is - * little endian) */ + * little endian) + */ static inline bool be_mcc_compl_is_new(struct be_mcc_compl *compl) { u32 flags; @@ -248,7 +249,8 @@ static int be_mcc_compl_process(struct be_adapter *adapter, u8 opcode = 0, subsystem = 0; /* Just swap the status to host endian; mcc tag is opaquely copied - * from mcc_wrb */ + * from mcc_wrb + */ be_dws_le_to_cpu(compl, 4); base_status = base_status(compl->status); @@ -657,8 +659,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) return 0; } -/* - * Insert the mailbox address into the doorbell in two steps +/* Insert the mailbox address into the doorbell in two steps * Polls on the mbox doorbell till a command completion (or a timeout) occurs */ static int be_mbox_notify_wait(struct be_adapter *adapter) @@ -802,7 +803,7 @@ static void be_wrb_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr, req_hdr->subsystem = subsystem; req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr)); req_hdr->version = 0; - fill_wrb_tags(wrb, (ulong) req_hdr); + fill_wrb_tags(wrb, (ulong)req_hdr); wrb->payload_length = cmd_len; if (mem) { wrb->embedded |= (1 & MCC_WRB_SGE_CNT_MASK) << @@ -832,8 +833,8 @@ static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages, static inline struct be_mcc_wrb *wrb_from_mbox(struct be_adapter *adapter) { struct be_dma_mem *mbox_mem = &adapter->mbox_mem; - struct be_mcc_wrb *wrb - = &((struct be_mcc_mailbox *)(mbox_mem->va))->wrb; + struct be_mcc_wrb *wrb = &((struct be_mcc_mailbox *)(mbox_mem->va))->wrb; + memset(wrb, 0, sizeof(*wrb)); return wrb; } @@ -896,7 +897,7 @@ static struct be_mcc_wrb *be_cmd_copy(struct be_adapter *adapter, memcpy(dest_wrb, wrb, sizeof(*wrb)); if (wrb->embedded & cpu_to_le32(MCC_WRB_EMBEDDED_MASK)) - fill_wrb_tags(dest_wrb, (ulong) embedded_payload(wrb)); + fill_wrb_tags(dest_wrb, (ulong)embedded_payload(wrb)); return dest_wrb; } @@ -1114,7 +1115,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, err: mutex_unlock(&adapter->mcc_lock); - if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) + if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; return status; @@ -1803,7 +1804,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) total_size = buf_len; - get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024; + get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60 * 1024; get_fat_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, get_fat_cmd.size, &get_fat_cmd.dma, GFP_ATOMIC); @@ -1813,7 +1814,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) mutex_lock(&adapter->mcc_lock); while (total_size) { - buf_size = min(total_size, (u32)60*1024); + buf_size = min(total_size, (u32)60 * 1024); total_size -= buf_size; wrb = wrb_from_mccq(adapter); @@ -3362,7 +3363,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, req->pattern = cpu_to_le64(pattern); req->byte_count = cpu_to_le32(byte_cnt); for (i = 0; i < byte_cnt; i++) { - req->snd_buff[i] = (u8)(pattern >> (j*8)); + req->snd_buff[i] = (u8)(pattern >> (j * 8)); j++; if (j > 7) j = 0; @@ -3846,7 +3847,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, req->hdr.domain = domain; req->mac_count = mac_count; if (mac_count) - memcpy(req->mac, mac_array, ETH_ALEN*mac_count); + memcpy(req->mac, mac_array, ETH_ALEN * mac_count); status = be_mcc_notify_wait(adapter); -- GitLab From 7d8c48917a9576b5fc8871aa4946149b0e4a4927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Tue, 7 Mar 2023 12:56:19 +0300 Subject: [PATCH 0232/2078] dt-bindings: net: dsa: mediatek,mt7530: change some descriptions to literal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The line endings must be preserved on gpio-controller, io-supply, and reset-gpios properties to look proper when the YAML file is parsed. Currently it's interpreted as a single line when parsed. Change the style of the description of these properties to literal style to preserve the line endings. Signed-off-by: Arınç ÜNAL Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/mediatek,mt7530.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml index 449ee07350122..5ae9cd8f99a24 100644 --- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml +++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml @@ -93,7 +93,7 @@ properties: gpio-controller: type: boolean - description: + description: | If defined, LED controller of the MT7530 switch will run on GPIO mode. There are 15 controllable pins. @@ -112,7 +112,7 @@ properties: maxItems: 1 io-supply: - description: + description: | Phandle to the regulator node necessary for the I/O power. See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt for details for the regulator setup on these boards. @@ -124,7 +124,7 @@ properties: switch is a part of the multi-chip module. reset-gpios: - description: + description: | GPIO to reset the switch. Use this if mediatek,mcm is not used. This property is optional because some boards share the reset line with other components which makes it impossible to probe the switch if the -- GitLab From fbd3dce9580882580b7d4779569062e371e8be48 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Tue, 7 Mar 2023 14:40:59 +0100 Subject: [PATCH 0233/2078] net: microchip: sparx5: Correct the spelling of the keysets in debugfs Correct the name used in the debugfs output. Signed-off-by: Steen Hegelund Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c index 07b472c84a47c..12722f728ef7b 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c @@ -198,7 +198,7 @@ static void sparx5_vcap_is2_port_keys(struct sparx5 *sparx5, out->prf(out->dst, "ip6_std"); break; case VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER: - out->prf(out->dst, "ip4_tcp_udp ipv4_other"); + out->prf(out->dst, "ip4_tcp_udp ip4_other"); break; } out->prf(out->dst, "\n ipv6_uc: "); -- GitLab From bfcb94aacc92f7025bd86a545037e5eb8db00e32 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Tue, 7 Mar 2023 14:41:00 +0100 Subject: [PATCH 0234/2078] net: microchip: sparx5: Provide rule count, key removal and keyset select This provides these 3 functions in the VCAP API: - Count the number of rules in a VCAP lookup (chain) - Remove a key from a VCAP rule - Find the keyset that gives the smallest rule list from a list of keysets Signed-off-by: Steen Hegelund Signed-off-by: David S. Miller --- .../net/ethernet/microchip/vcap/vcap_api.c | 61 +++++++++++++++++++ .../ethernet/microchip/vcap/vcap_api_client.h | 11 ++++ 2 files changed, 72 insertions(+) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index 4847d0d99ec92..5675b0962bc3c 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -976,6 +976,25 @@ int vcap_lookup_rule_by_cookie(struct vcap_control *vctrl, u64 cookie) } EXPORT_SYMBOL_GPL(vcap_lookup_rule_by_cookie); +/* Get number of rules in a vcap instance lookup chain id range */ +int vcap_admin_rule_count(struct vcap_admin *admin, int cid) +{ + int max_cid = roundup(cid + 1, VCAP_CID_LOOKUP_SIZE); + int min_cid = rounddown(cid, VCAP_CID_LOOKUP_SIZE); + struct vcap_rule_internal *elem; + int count = 0; + + list_for_each_entry(elem, &admin->rules, list) { + mutex_lock(&admin->lock); + if (elem->data.vcap_chain_id >= min_cid && + elem->data.vcap_chain_id < max_cid) + ++count; + mutex_unlock(&admin->lock); + } + return count; +} +EXPORT_SYMBOL_GPL(vcap_admin_rule_count); + /* Make a copy of the rule, shallow or full */ static struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri, bool full) @@ -3403,6 +3422,25 @@ int vcap_rule_mod_key_u32(struct vcap_rule *rule, enum vcap_key_field key, } EXPORT_SYMBOL_GPL(vcap_rule_mod_key_u32); +/* Remove a key field with value and mask in the rule */ +int vcap_rule_rem_key(struct vcap_rule *rule, enum vcap_key_field key) +{ + struct vcap_rule_internal *ri = to_intrule(rule); + struct vcap_client_keyfield *field; + + field = vcap_find_keyfield(rule, key); + if (!field) { + pr_err("%s:%d: key %s is not in the rule\n", + __func__, __LINE__, vcap_keyfield_name(ri->vctrl, key)); + return -EINVAL; + } + /* Deallocate the key field */ + list_del(&field->ctrl.list); + kfree(field); + return 0; +} +EXPORT_SYMBOL_GPL(vcap_rule_rem_key); + static int vcap_rule_mod_action(struct vcap_rule *rule, enum vcap_action_field action, enum vcap_field_type ftype, @@ -3475,6 +3513,29 @@ int vcap_filter_rule_keys(struct vcap_rule *rule, } EXPORT_SYMBOL_GPL(vcap_filter_rule_keys); +/* Select the keyset from the list that results in the smallest rule size */ +enum vcap_keyfield_set +vcap_select_min_rule_keyset(struct vcap_control *vctrl, + enum vcap_type vtype, + struct vcap_keyset_list *kslist) +{ + enum vcap_keyfield_set ret = VCAP_KFS_NO_VALUE; + const struct vcap_set *kset; + int max = 100, idx; + + for (idx = 0; idx < kslist->cnt; ++idx) { + kset = vcap_keyfieldset(vctrl, vtype, kslist->keysets[idx]); + if (!kset) + continue; + if (kset->sw_per_item >= max) + continue; + max = kset->sw_per_item; + ret = kslist->keysets[idx]; + } + return ret; +} +EXPORT_SYMBOL_GPL(vcap_select_min_rule_keyset); + /* Make a full copy of an existing rule with a new rule id */ struct vcap_rule *vcap_copy_rule(struct vcap_rule *erule) { diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h index 417af9754bccb..d9d1f7c9d762c 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h @@ -201,6 +201,9 @@ int vcap_rule_add_action_bit(struct vcap_rule *rule, int vcap_rule_add_action_u32(struct vcap_rule *rule, enum vcap_action_field action, u32 value); +/* Get number of rules in a vcap instance lookup chain id range */ +int vcap_admin_rule_count(struct vcap_admin *admin, int cid); + /* VCAP rule counter operations */ int vcap_get_rule_count_by_cookie(struct vcap_control *vctrl, struct vcap_counter *ctr, u64 cookie); @@ -269,6 +272,14 @@ int vcap_rule_mod_action_u32(struct vcap_rule *rule, int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key, u32 *value, u32 *mask); +/* Remove a key field with value and mask in the rule */ +int vcap_rule_rem_key(struct vcap_rule *rule, enum vcap_key_field key); + +/* Select the keyset from the list that results in the smallest rule size */ +enum vcap_keyfield_set +vcap_select_min_rule_keyset(struct vcap_control *vctrl, enum vcap_type vtype, + struct vcap_keyset_list *kslist); + struct vcap_client_actionfield * vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act); #endif /* __VCAP_API_CLIENT__ */ -- GitLab From 1c14432dcefaf44c2e3fa11ac61097ee342eecfe Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Tue, 7 Mar 2023 14:41:01 +0100 Subject: [PATCH 0235/2078] net: microchip: sparx5: Add TC template list to a port This adds a list that is used to collect the templates that are active on a port. This allows the template creation to change the port configuration and the template destruction to change it back. Signed-off-by: Steen Hegelund Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 42b77ba9b5721..a7edf524eedbc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -282,6 +282,7 @@ static int sparx5_create_port(struct sparx5 *sparx5, spx5_port->phylink_pcs.poll = true; spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops; spx5_port->is_mrouter = false; + INIT_LIST_HEAD(&spx5_port->tc_templates); sparx5->ports[config->portno] = spx5_port; err = sparx5_port_init(sparx5, spx5_port, &config->conf); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 72e7928912eb1..62c85463b634e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -192,6 +192,7 @@ struct sparx5_port { u16 ts_id; struct sk_buff_head tx_skbs; bool is_mrouter; + struct list_head tc_templates; /* list of TC templates on this port */ }; enum sparx5_core_clockfreq { -- GitLab From d9f175b0dffbf70add634319a81d683be782c2e1 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Tue, 7 Mar 2023 14:41:02 +0100 Subject: [PATCH 0236/2078] net: microchip: sparx5: Add port keyset changing functionality With this its is now possible for clients (like TC) to change the port keyset configuration in the Sparx5 VCAPs. This is typically done per traffic class which is guided with the L3 protocol information. Before the change the current keyset configuration is collected in a list that is handed back to the client. Signed-off-by: Steen Hegelund Signed-off-by: David S. Miller --- .../microchip/sparx5/sparx5_vcap_impl.c | 270 ++++++++++++++++++ .../microchip/sparx5/sparx5_vcap_impl.h | 6 + 2 files changed, 276 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c index d0d4e0385ac72..187efa1fc904f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c @@ -1519,6 +1519,276 @@ static struct vcap_operations sparx5_vcap_ops = { .port_info = sparx5_port_info, }; +static u32 sparx5_vcap_is0_keyset_to_etype_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_NORMAL_7TUPLE: + return VCAP_IS0_PS_ETYPE_NORMAL_7TUPLE; + case VCAP_KFS_NORMAL_5TUPLE_IP4: + return VCAP_IS0_PS_ETYPE_NORMAL_5TUPLE_IP4; + default: + return VCAP_IS0_PS_ETYPE_NORMAL_7TUPLE; + } +} + +static void sparx5_vcap_is0_set_port_keyset(struct net_device *ndev, int lookup, + enum vcap_keyfield_set keyset, + int l3_proto) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5 *sparx5 = port->sparx5; + int portno = port->portno; + u32 value; + + switch (l3_proto) { + case ETH_P_IP: + value = sparx5_vcap_is0_keyset_to_etype_ps(keyset); + spx5_rmw(ANA_CL_ADV_CL_CFG_IP4_CLM_KEY_SEL_SET(value), + ANA_CL_ADV_CL_CFG_IP4_CLM_KEY_SEL, + sparx5, + ANA_CL_ADV_CL_CFG(portno, lookup)); + break; + case ETH_P_IPV6: + value = sparx5_vcap_is0_keyset_to_etype_ps(keyset); + spx5_rmw(ANA_CL_ADV_CL_CFG_IP6_CLM_KEY_SEL_SET(value), + ANA_CL_ADV_CL_CFG_IP6_CLM_KEY_SEL, + sparx5, + ANA_CL_ADV_CL_CFG(portno, lookup)); + break; + default: + value = sparx5_vcap_is0_keyset_to_etype_ps(keyset); + spx5_rmw(ANA_CL_ADV_CL_CFG_ETYPE_CLM_KEY_SEL_SET(value), + ANA_CL_ADV_CL_CFG_ETYPE_CLM_KEY_SEL, + sparx5, + ANA_CL_ADV_CL_CFG(portno, lookup)); + break; + } +} + +static u32 sparx5_vcap_is2_keyset_to_arp_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_ARP: + return VCAP_IS2_PS_ARP_ARP; + default: + return VCAP_IS2_PS_ARP_MAC_ETYPE; + } +} + +static u32 sparx5_vcap_is2_keyset_to_ipv4_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_MAC_ETYPE: + return VCAP_IS2_PS_IPV4_UC_MAC_ETYPE; + case VCAP_KFS_IP4_OTHER: + case VCAP_KFS_IP4_TCP_UDP: + return VCAP_IS2_PS_IPV4_UC_IP4_TCP_UDP_OTHER; + case VCAP_KFS_IP_7TUPLE: + return VCAP_IS2_PS_IPV4_UC_IP_7TUPLE; + default: + return VCAP_KFS_NO_VALUE; + } +} + +static u32 sparx5_vcap_is2_keyset_to_ipv6_uc_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_MAC_ETYPE: + return VCAP_IS2_PS_IPV6_UC_MAC_ETYPE; + case VCAP_KFS_IP4_OTHER: + case VCAP_KFS_IP4_TCP_UDP: + return VCAP_IS2_PS_IPV6_UC_IP4_TCP_UDP_OTHER; + case VCAP_KFS_IP_7TUPLE: + return VCAP_IS2_PS_IPV6_UC_IP_7TUPLE; + default: + return VCAP_KFS_NO_VALUE; + } +} + +static u32 sparx5_vcap_is2_keyset_to_ipv6_mc_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_MAC_ETYPE: + return VCAP_IS2_PS_IPV6_MC_MAC_ETYPE; + case VCAP_KFS_IP4_OTHER: + case VCAP_KFS_IP4_TCP_UDP: + return VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER; + case VCAP_KFS_IP_7TUPLE: + return VCAP_IS2_PS_IPV6_MC_IP_7TUPLE; + default: + return VCAP_KFS_NO_VALUE; + } +} + +static void sparx5_vcap_is2_set_port_keyset(struct net_device *ndev, int lookup, + enum vcap_keyfield_set keyset, + int l3_proto) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5 *sparx5 = port->sparx5; + int portno = port->portno; + u32 value; + + switch (l3_proto) { + case ETH_P_ARP: + value = sparx5_vcap_is2_keyset_to_arp_ps(keyset); + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + break; + case ETH_P_IP: + value = sparx5_vcap_is2_keyset_to_ipv4_ps(keyset); + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP4_UC_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_IP4_UC_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP4_MC_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_IP4_MC_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + break; + case ETH_P_IPV6: + value = sparx5_vcap_is2_keyset_to_ipv6_uc_ps(keyset); + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + value = sparx5_vcap_is2_keyset_to_ipv6_mc_ps(keyset); + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP6_MC_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_IP6_MC_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + break; + default: + value = VCAP_IS2_PS_NONETH_MAC_ETYPE; + spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_NON_ETH_KEY_SEL_SET(value), + ANA_ACL_VCAP_S2_KEY_SEL_NON_ETH_KEY_SEL, + sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup)); + break; + } +} + +static u32 sparx5_vcap_es2_keyset_to_arp_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_ARP: + return VCAP_ES2_PS_ARP_ARP; + default: + return VCAP_ES2_PS_ARP_MAC_ETYPE; + } +} + +static u32 sparx5_vcap_es2_keyset_to_ipv4_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_MAC_ETYPE: + return VCAP_ES2_PS_IPV4_MAC_ETYPE; + case VCAP_KFS_IP_7TUPLE: + return VCAP_ES2_PS_IPV4_IP_7TUPLE; + case VCAP_KFS_IP4_TCP_UDP: + return VCAP_ES2_PS_IPV4_IP4_TCP_UDP_OTHER; + case VCAP_KFS_IP4_OTHER: + return VCAP_ES2_PS_IPV4_IP4_OTHER; + default: + return VCAP_ES2_PS_IPV4_MAC_ETYPE; + } +} + +static u32 sparx5_vcap_es2_keyset_to_ipv6_ps(enum vcap_keyfield_set keyset) +{ + switch (keyset) { + case VCAP_KFS_MAC_ETYPE: + return VCAP_ES2_PS_IPV6_MAC_ETYPE; + case VCAP_KFS_IP4_TCP_UDP: + case VCAP_KFS_IP4_OTHER: + return VCAP_ES2_PS_IPV6_IP4_DOWNGRADE; + case VCAP_KFS_IP_7TUPLE: + return VCAP_ES2_PS_IPV6_IP_7TUPLE; + case VCAP_KFS_IP6_STD: + return VCAP_ES2_PS_IPV6_IP6_STD; + default: + return VCAP_ES2_PS_IPV6_MAC_ETYPE; + } +} + +static void sparx5_vcap_es2_set_port_keyset(struct net_device *ndev, int lookup, + enum vcap_keyfield_set keyset, + int l3_proto) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5 *sparx5 = port->sparx5; + int portno = port->portno; + u32 value; + + switch (l3_proto) { + case ETH_P_IP: + value = sparx5_vcap_es2_keyset_to_ipv4_ps(keyset); + spx5_rmw(EACL_VCAP_ES2_KEY_SEL_IP4_KEY_SEL_SET(value), + EACL_VCAP_ES2_KEY_SEL_IP4_KEY_SEL, + sparx5, + EACL_VCAP_ES2_KEY_SEL(portno, lookup)); + break; + case ETH_P_IPV6: + value = sparx5_vcap_es2_keyset_to_ipv6_ps(keyset); + spx5_rmw(EACL_VCAP_ES2_KEY_SEL_IP6_KEY_SEL_SET(value), + EACL_VCAP_ES2_KEY_SEL_IP6_KEY_SEL, + sparx5, + EACL_VCAP_ES2_KEY_SEL(portno, lookup)); + break; + case ETH_P_ARP: + value = sparx5_vcap_es2_keyset_to_arp_ps(keyset); + spx5_rmw(EACL_VCAP_ES2_KEY_SEL_ARP_KEY_SEL_SET(value), + EACL_VCAP_ES2_KEY_SEL_ARP_KEY_SEL, + sparx5, + EACL_VCAP_ES2_KEY_SEL(portno, lookup)); + break; + } +} + +/* Change the port keyset for the lookup and protocol */ +void sparx5_vcap_set_port_keyset(struct net_device *ndev, + struct vcap_admin *admin, + int cid, + u16 l3_proto, + enum vcap_keyfield_set keyset, + struct vcap_keyset_list *orig) +{ + struct sparx5_port *port; + int lookup; + + switch (admin->vtype) { + case VCAP_TYPE_IS0: + lookup = sparx5_vcap_is0_cid_to_lookup(cid); + if (orig) + sparx5_vcap_is0_get_port_keysets(ndev, lookup, orig, + l3_proto); + sparx5_vcap_is0_set_port_keyset(ndev, lookup, keyset, l3_proto); + break; + case VCAP_TYPE_IS2: + lookup = sparx5_vcap_is2_cid_to_lookup(cid); + if (orig) + sparx5_vcap_is2_get_port_keysets(ndev, lookup, orig, + l3_proto); + sparx5_vcap_is2_set_port_keyset(ndev, lookup, keyset, l3_proto); + break; + case VCAP_TYPE_ES0: + break; + case VCAP_TYPE_ES2: + lookup = sparx5_vcap_es2_cid_to_lookup(cid); + if (orig) + sparx5_vcap_es2_get_port_keysets(ndev, lookup, orig, + l3_proto); + sparx5_vcap_es2_set_port_keyset(ndev, lookup, keyset, l3_proto); + break; + default: + port = netdev_priv(ndev); + sparx5_vcap_type_err(port->sparx5, admin, __func__); + break; + } +} + /* Enable IS0 lookups per port and set the keyset generation */ static void sparx5_vcap_is0_port_key_selection(struct sparx5 *sparx5, struct vcap_admin *admin) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h index 3260ab5e3a829..2684d9199b05a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h @@ -195,6 +195,12 @@ int sparx5_vcap_get_port_keyset(struct net_device *ndev, u16 l3_proto, struct vcap_keyset_list *kslist); +/* Change the port keyset for the lookup and protocol */ +void sparx5_vcap_set_port_keyset(struct net_device *ndev, + struct vcap_admin *admin, int cid, + u16 l3_proto, enum vcap_keyfield_set keyset, + struct vcap_keyset_list *orig); + /* Check if the ethertype is supported by the vcap port classification */ bool sparx5_vcap_is_known_etype(struct vcap_admin *admin, u16 etype); -- GitLab From e1d597ecbe5c5d7128630d4c9ec27c42018613ed Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Tue, 7 Mar 2023 14:41:03 +0100 Subject: [PATCH 0237/2078] net: microchip: sparx5: Add TC template support This adds support for using the "template add" and "template destroy" functionality to change the port keyset configuration. If the VCAP lookup already contains rules, the port keyset is left unchanged, as a change would make these rules unusable. When the template is destroyed the port keyset configuration is restored. The filters using the template chain will automatically be deleted by the TC framework. Signed-off-by: Steen Hegelund Signed-off-by: David S. Miller --- .../microchip/sparx5/sparx5_tc_flower.c | 209 +++++++++++++++++- 1 file changed, 202 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index b36819aafacac..3f87a5285a6de 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -28,6 +28,14 @@ struct sparx5_multiple_rules { struct sparx5_wildcard_rule rule[SPX5_MAX_RULE_SIZE]; }; +struct sparx5_tc_flower_template { + struct list_head list; /* for insertion in the list of templates */ + int cid; /* chain id */ + enum vcap_keyfield_set orig; /* keyset used before the template */ + enum vcap_keyfield_set keyset; /* new keyset used by template */ + u16 l3_proto; /* protocol specified in the template */ +}; + static int sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st) { @@ -382,7 +390,7 @@ static int sparx5_tc_select_protocol_keyset(struct net_device *ndev, /* Find the keysets that the rule can use */ matches.keysets = keysets; matches.max = ARRAY_SIZE(keysets); - if (vcap_rule_find_keysets(vrule, &matches) == 0) + if (!vcap_rule_find_keysets(vrule, &matches)) return -EINVAL; /* Find the keysets that the port configuration supports */ @@ -996,6 +1004,73 @@ static int sparx5_tc_action_vlan_push(struct vcap_admin *admin, return err; } +/* Remove rule keys that may prevent templates from matching a keyset */ +static void sparx5_tc_flower_simplify_rule(struct vcap_admin *admin, + struct vcap_rule *vrule, + u16 l3_proto) +{ + switch (admin->vtype) { + case VCAP_TYPE_IS0: + vcap_rule_rem_key(vrule, VCAP_KF_ETYPE); + switch (l3_proto) { + case ETH_P_IP: + break; + case ETH_P_IPV6: + vcap_rule_rem_key(vrule, VCAP_KF_IP_SNAP_IS); + break; + default: + break; + } + break; + case VCAP_TYPE_ES2: + switch (l3_proto) { + case ETH_P_IP: + if (vrule->keyset == VCAP_KFS_IP4_OTHER) + vcap_rule_rem_key(vrule, VCAP_KF_TCP_IS); + break; + case ETH_P_IPV6: + if (vrule->keyset == VCAP_KFS_IP6_STD) + vcap_rule_rem_key(vrule, VCAP_KF_TCP_IS); + vcap_rule_rem_key(vrule, VCAP_KF_IP4_IS); + break; + default: + break; + } + break; + case VCAP_TYPE_IS2: + switch (l3_proto) { + case ETH_P_IP: + case ETH_P_IPV6: + vcap_rule_rem_key(vrule, VCAP_KF_IP4_IS); + break; + default: + break; + } + break; + default: + break; + } +} + +static bool sparx5_tc_flower_use_template(struct net_device *ndev, + struct flow_cls_offload *fco, + struct vcap_admin *admin, + struct vcap_rule *vrule) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5_tc_flower_template *ftp; + + list_for_each_entry(ftp, &port->tc_templates, list) { + if (ftp->cid != fco->common.chain_index) + continue; + + vcap_set_rule_set_keyset(vrule, ftp->keyset); + sparx5_tc_flower_simplify_rule(admin, vrule, ftp->l3_proto); + return true; + } + return false; +} + static int sparx5_tc_flower_replace(struct net_device *ndev, struct flow_cls_offload *fco, struct vcap_admin *admin, @@ -1122,12 +1197,14 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, goto out; } - err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin, - state.l3_proto, &multi); - if (err) { - NL_SET_ERR_MSG_MOD(fco->common.extack, - "No matching port keyset for filter protocol and keys"); - goto out; + if (!sparx5_tc_flower_use_template(ndev, fco, admin, vrule)) { + err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin, + state.l3_proto, &multi); + if (err) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "No matching port keyset for filter protocol and keys"); + goto out; + } } /* provide the l3 protocol to guide the keyset selection */ @@ -1259,6 +1336,120 @@ static int sparx5_tc_flower_stats(struct net_device *ndev, return err; } +static int sparx5_tc_flower_template_create(struct net_device *ndev, + struct flow_cls_offload *fco, + struct vcap_admin *admin) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct vcap_tc_flower_parse_usage state = { + .fco = fco, + .l3_proto = ETH_P_ALL, + .admin = admin, + }; + struct sparx5_tc_flower_template *ftp; + struct vcap_keyset_list kslist = {}; + enum vcap_keyfield_set keysets[10]; + struct vcap_control *vctrl; + struct vcap_rule *vrule; + int count, err; + + if (admin->vtype == VCAP_TYPE_ES0) { + pr_err("%s:%d: %s\n", __func__, __LINE__, + "VCAP does not support templates"); + return -EINVAL; + } + + count = vcap_admin_rule_count(admin, fco->common.chain_index); + if (count > 0) { + pr_err("%s:%d: %s\n", __func__, __LINE__, + "Filters are already present"); + return -EBUSY; + } + + ftp = kzalloc(sizeof(*ftp), GFP_KERNEL); + if (!ftp) + return -ENOMEM; + + ftp->cid = fco->common.chain_index; + ftp->orig = VCAP_KFS_NO_VALUE; + ftp->keyset = VCAP_KFS_NO_VALUE; + + vctrl = port->sparx5->vcap_ctrl; + vrule = vcap_alloc_rule(vctrl, ndev, fco->common.chain_index, + VCAP_USER_TC, fco->common.prio, 0); + if (IS_ERR(vrule)) { + err = PTR_ERR(vrule); + goto err_rule; + } + + state.vrule = vrule; + state.frule = flow_cls_offload_flow_rule(fco); + err = sparx5_tc_use_dissectors(&state, admin, vrule); + if (err) { + pr_err("%s:%d: key error: %d\n", __func__, __LINE__, err); + goto out; + } + + ftp->l3_proto = state.l3_proto; + + sparx5_tc_flower_simplify_rule(admin, vrule, state.l3_proto); + + /* Find the keysets that the rule can use */ + kslist.keysets = keysets; + kslist.max = ARRAY_SIZE(keysets); + if (!vcap_rule_find_keysets(vrule, &kslist)) { + pr_err("%s:%d: %s\n", __func__, __LINE__, + "Could not find a suitable keyset"); + err = -ENOENT; + goto out; + } + + ftp->keyset = vcap_select_min_rule_keyset(vctrl, admin->vtype, &kslist); + kslist.cnt = 0; + sparx5_vcap_set_port_keyset(ndev, admin, fco->common.chain_index, + state.l3_proto, + ftp->keyset, + &kslist); + + if (kslist.cnt > 0) + ftp->orig = kslist.keysets[0]; + + /* Store new template */ + list_add_tail(&ftp->list, &port->tc_templates); + vcap_free_rule(vrule); + return 0; + +out: + vcap_free_rule(vrule); +err_rule: + kfree(ftp); + return err; +} + +static int sparx5_tc_flower_template_destroy(struct net_device *ndev, + struct flow_cls_offload *fco, + struct vcap_admin *admin) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5_tc_flower_template *ftp, *tmp; + int err = -ENOENT; + + /* Rules using the template are removed by the tc framework */ + list_for_each_entry_safe(ftp, tmp, &port->tc_templates, list) { + if (ftp->cid != fco->common.chain_index) + continue; + + sparx5_vcap_set_port_keyset(ndev, admin, + fco->common.chain_index, + ftp->l3_proto, ftp->orig, + NULL); + list_del(&ftp->list); + kfree(ftp); + break; + } + return err; +} + int sparx5_tc_flower(struct net_device *ndev, struct flow_cls_offload *fco, bool ingress) { @@ -1282,6 +1473,10 @@ int sparx5_tc_flower(struct net_device *ndev, struct flow_cls_offload *fco, return sparx5_tc_flower_destroy(ndev, fco, admin); case FLOW_CLS_STATS: return sparx5_tc_flower_stats(ndev, fco, admin); + case FLOW_CLS_TMPLT_CREATE: + return sparx5_tc_flower_template_create(ndev, fco, admin); + case FLOW_CLS_TMPLT_DESTROY: + return sparx5_tc_flower_template_destroy(ndev, fco, admin); default: return -EOPNOTSUPP; } -- GitLab From 4386b921857793440ebd4db3d6b70639149c7074 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Fri, 24 Feb 2023 10:52:51 +0100 Subject: [PATCH 0238/2078] netfilter: bridge: introduce broute meta statement nftables equivalent for ebtables -t broute. Implement broute meta statement to set br_netfilter_broute flag in skb to force a packet to be routed instead of being bridged. Signed-off-by: Sriram Yagnaraman Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/bridge/netfilter/nft_meta_bridge.c | 71 +++++++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ff677f3a6cadb..9c6f02c26054a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -931,6 +931,7 @@ enum nft_exthdr_attributes { * @NFT_META_TIME_HOUR: hour of day (in seconds) * @NFT_META_SDIF: slave device interface index * @NFT_META_SDIFNAME: slave device interface name + * @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit */ enum nft_meta_keys { NFT_META_LEN, @@ -969,6 +970,7 @@ enum nft_meta_keys { NFT_META_TIME_HOUR, NFT_META_SDIF, NFT_META_SDIFNAME, + NFT_META_BRI_BROUTE, __NFT_META_IIFTYPE, }; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index c3ecd77e25cb8..bd4d1b4d745f6 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -8,6 +8,9 @@ #include #include #include +#include /* NF_BR_PRE_ROUTING */ + +#include "../br_private.h" static const struct net_device * nft_meta_get_bridge(const struct net_device *dev) @@ -102,6 +105,50 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .reduce = nft_meta_get_reduce, }; +static void nft_meta_bridge_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + u32 *sreg = ®s->data[meta->sreg]; + struct sk_buff *skb = pkt->skb; + u8 value8; + + switch (meta->key) { + case NFT_META_BRI_BROUTE: + value8 = nft_reg_load8(sreg); + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = !!value8; + break; + default: + nft_meta_set_eval(expr, regs, pkt); + } +} + +static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_BROUTE: + len = sizeof(u8); + break; + default: + return nft_meta_set_init(ctx, expr, tb); + } + + priv->len = len; + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + if (err < 0) + return err; + + return 0; +} + static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { @@ -120,15 +167,33 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, return false; } +static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_META_BRI_BROUTE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + default: + return nft_meta_set_validate(ctx, expr, data); + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static const struct nft_expr_ops nft_meta_bridge_set_ops = { .type = &nft_meta_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_set_eval, - .init = nft_meta_set_init, + .eval = nft_meta_bridge_set_eval, + .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_bridge_set_reduce, - .validate = nft_meta_set_validate, + .validate = nft_meta_bridge_set_validate, }; static const struct nft_expr_ops * -- GitLab From 10369080454d87ee5b2db211ce947cb3118f0e13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 14:59:59 +0000 Subject: [PATCH 0239/2078] net: reclaim skb->scm_io_uring bit Commit 0091bfc81741 ("io_uring/af_unix: defer registered files gc to io_uring release") added one bit to struct sk_buff. This structure is critical for networking, and we try very hard to not add bloat on it, unless absolutely required. For instance, we can use a specific destructor as a wrapper around unix_destruct_scm(), to identify skbs that unix_gc() has to special case. Signed-off-by: Eric Dumazet Cc: Pavel Begunkov Cc: Thadeu Lima de Souza Cascardo Cc: Jens Axboe Reviewed-by: Jens Axboe Reviewed-by: Pavel Begunkov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- include/net/af_unix.h | 1 + io_uring/rsrc.c | 3 +-- net/unix/garbage.c | 2 +- net/unix/scm.c | 6 ++++++ 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ff7ad331fb825..fe661011644b8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -810,7 +810,6 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) - * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -989,7 +988,6 @@ struct sk_buff { #endif __u8 slow_gro:1; __u8 csum_not_inet:1; - __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 480fa579787e5..45ebde587138e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -11,6 +11,7 @@ void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); +void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index a59fc02de5983..27ceda3b50cf4 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -867,8 +867,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) UNIXCB(skb).fp = fpl; skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = unix_destruct_scm; + skb->destructor = io_uring_destruct_scm; refcount_add(skb->truesize, &sk->sk_wmem_alloc); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc27635403932..2405f0f9af31c 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -305,7 +305,7 @@ void unix_gc(void) * release.path eventually putting registered files. */ skb_queue_walk_safe(&hitlist, skb, next_skb) { - if (skb->scm_io_uring) { + if (skb->destructor == io_uring_destruct_scm) { __skb_unlink(skb, &hitlist); skb_queue_tail(&skb->sk->sk_receive_queue, skb); } diff --git a/net/unix/scm.c b/net/unix/scm.c index aa27a02478dc1..f9152881d77f6 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -152,3 +152,9 @@ void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } EXPORT_SYMBOL(unix_destruct_scm); + +void io_uring_destruct_scm(struct sk_buff *skb) +{ + unix_destruct_scm(skb); +} +EXPORT_SYMBOL(io_uring_destruct_scm); -- GitLab From 9ccff83b1322f95da7a74784cf6f47a481e03dc5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:27 -0500 Subject: [PATCH 0240/2078] netfilter: bridge: call pskb_may_pull in br_nf_check_hbh_len When checking Hop-by-hop option header, if the option data is in nonlinear area, it should do pskb_may_pull instead of discarding the skb as a bad IPv6 packet. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6b07f30675bb0..afd1c718b683a 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -45,14 +45,18 @@ */ static int br_nf_check_hbh_len(struct sk_buff *skb) { - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - if ((raw + len) - skb->data > skb_headlen(skb)) + if (!pskb_may_pull(skb, off + 8)) goto bad; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + goto bad; + nh = skb_network_header(skb); off += 2; len -= 2; -- GitLab From a7f1a2f43e683c8ffca691d45f2cb32c052158fa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:28 -0500 Subject: [PATCH 0241/2078] netfilter: bridge: check len before accessing more nh data In the while loop of br_nf_check_hbh_len(), similar to ip6_parse_tlv(), before accessing 'nh[off + 1]', it should add a check 'len < 2'; and before parsing IPV6_TLV_JUMBO, it should add a check 'optlen > len', in case of overflows. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 45 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index afd1c718b683a..8be3c5c8b925d 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -50,54 +50,49 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) u32 pkt_len; if (!pskb_may_pull(skb, off + 8)) - goto bad; + return -1; nh = (unsigned char *)(ipv6_hdr(skb) + 1); len = (nh[1] + 1) << 3; if (!pskb_may_pull(skb, off + len)) - goto bad; + return -1; nh = skb_network_header(skb); off += 2; len -= 2; - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; + int optlen; - case IPV6_TLV_PADN: - break; + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -1; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -1; - case IPV6_TLV_JUMBO: + if (nh[off] == IPV6_TLV_JUMBO) { if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; + return -1; pkt_len = ntohl(*(__be32 *)(nh + off + 2)); if (pkt_len <= IPV6_MAXPLEN || ipv6_hdr(skb)->payload_len) - goto bad; + return -1; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; + return -1; if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - goto bad; + return -1; nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; } off += optlen; len -= optlen; } - if (len == 0) - return 0; -bad: - return -1; + + return len ? -1 : 0; } int br_validate_ipv6(struct net *net, struct sk_buff *skb) -- GitLab From 0b24bd71a6c0214aaa2115302dd6598b89d2fa8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:29 -0500 Subject: [PATCH 0242/2078] netfilter: bridge: move pskb_trim_rcsum out of br_nf_check_hbh_len br_nf_check_hbh_len() is a function to check the Hop-by-hop option header, and shouldn't do pskb_trim_rcsum() there. This patch is to pass pkt_len out to br_validate_ipv6() and do pskb_trim_rcsum() after calling br_validate_ipv6() instead. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 8be3c5c8b925d..a0d6dfb3e2556 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -43,11 +43,10 @@ /* We only check the length. A bridge shouldn't do any hop-by-hop stuff * anyway */ -static int br_nf_check_hbh_len(struct sk_buff *skb) +static int br_nf_check_hbh_len(struct sk_buff *skb, u32 *plen) { int len, off = sizeof(struct ipv6hdr); unsigned char *nh; - u32 pkt_len; if (!pskb_may_pull(skb, off + 8)) return -1; @@ -75,6 +74,8 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) return -1; if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + if (nh[off + 1] != 4 || (off & 3) != 2) return -1; pkt_len = ntohl(*(__be32 *)(nh + off + 2)); @@ -83,10 +84,7 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) return -1; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) return -1; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - return -1; - nh = skb_network_header(skb); + *plen = pkt_len; } off += optlen; len -= optlen; @@ -114,22 +112,19 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb, &pkt_len)) + goto drop; - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - hdr = ipv6_hdr(skb); + if (pkt_len + ip6h_len > skb->len) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; } - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; + } memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); /* No IP options in IPv6 header; however it should be -- GitLab From 28e144cf5f72ce1c304571bc448e37c27495903a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:30 -0500 Subject: [PATCH 0243/2078] netfilter: move br_nf_check_hbh_len to utils Rename br_nf_check_hbh_len() to nf_ip6_check_hbh_len() and move it to netfilter utils, so that it can be used by other modules, like ovs and tc. Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- include/linux/netfilter_ipv6.h | 2 ++ net/bridge/br_netfilter_ipv6.c | 55 +--------------------------------- net/netfilter/utils.c | 52 ++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 48314ade1506f..7834c0be2831d 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -197,6 +197,8 @@ static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); + int ipv6_netfilter_init(void); void ipv6_netfilter_fini(void); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index a0d6dfb3e2556..550039dfc31a9 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -40,59 +40,6 @@ #include #endif -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int br_nf_check_hbh_len(struct sk_buff *skb, u32 *plen) -{ - int len, off = sizeof(struct ipv6hdr); - unsigned char *nh; - - if (!pskb_may_pull(skb, off + 8)) - return -1; - nh = (unsigned char *)(ipv6_hdr(skb) + 1); - len = (nh[1] + 1) << 3; - - if (!pskb_may_pull(skb, off + len)) - return -1; - nh = skb_network_header(skb); - - off += 2; - len -= 2; - while (len > 0) { - int optlen; - - if (nh[off] == IPV6_TLV_PAD1) { - off++; - len--; - continue; - } - if (len < 2) - return -1; - optlen = nh[off + 1] + 2; - if (optlen > len) - return -1; - - if (nh[off] == IPV6_TLV_JUMBO) { - u32 pkt_len; - - if (nh[off + 1] != 4 || (off & 3) != 2) - return -1; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - return -1; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - return -1; - *plen = pkt_len; - } - off += optlen; - len -= optlen; - } - - return len ? -1 : 0; -} - int br_validate_ipv6(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *hdr; @@ -112,7 +59,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb, &pkt_len)) + if (hdr->nexthdr == NEXTHDR_HOP && nf_ip6_check_hbh_len(skb, &pkt_len)) goto drop; if (pkt_len + ip6h_len > skb->len) { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 2182d361e273f..acef4155f0daf 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) } return ret; } + +/* Only get and check the lengths, not do any hop-by-hop stuff. */ +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) +{ + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; + + if (!pskb_may_pull(skb, off + 8)) + return -ENOMEM; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + return -ENOMEM; + nh = skb_network_header(skb); + + off += 2; + len -= 2; + while (len > 0) { + int optlen; + + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -EBADMSG; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -EBADMSG; + + if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + + if (nh[off + 1] != 4 || (off & 3) != 2) + return -EBADMSG; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + return -EBADMSG; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + return -EBADMSG; + *plen = pkt_len; + } + off += optlen; + len -= optlen; + } + + return len ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); -- GitLab From eaafdaa3e92234b877b645431957549a1f87e2bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:31 -0500 Subject: [PATCH 0244/2078] netfilter: use nf_ip6_check_hbh_len in nf_ct_skb_network_trim For IPv6 Jumbo packets, the ipv6_hdr(skb)->payload_len is always 0, and its real payload_len ( > 65535) is saved in hbh exthdr. With 0 length for the jumbo packets, all data and exthdr will be trimmed in nf_ct_skb_network_trim(). This patch is to call nf_ip6_check_hbh_len() to get real pkt_len of the IPv6 packet, similar to br_validate_ipv6(). Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_ovs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index 52b776bdf526d..068e9489e1c27 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -6,6 +6,7 @@ #include #include #include +#include /* 'skb' should already be pulled to nh_ofs. */ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, @@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb_ip_totlen(skb); break; case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); + len = ntohs(ipv6_hdr(skb)->payload_len); + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) { + int err = nf_ip6_check_hbh_len(skb, &len); + + if (err) + return err; + } + len += sizeof(struct ipv6hdr); break; default: len = skb->len; -- GitLab From 6bb382bcf742de5c17209848325653059c995a04 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:32 -0500 Subject: [PATCH 0245/2078] selftests: add a selftest for big tcp This test runs on the client-router-server topo, and monitors the traffic on the RX devices of router and server while sending BIG TCP packets with netperf from client to server. Meanwhile, it changes 'tso' on the TX devs and 'gro' on the RX devs. Then it checks if any BIG TCP packets appears on the RX devs with 'ip/ip6tables -m length ! --length 0:65535' for each case. Note that we also add tc action ct in link1 ingress to cover the ipv6 jumbo packets process in nf_ct_skb_network_trim() of nf_conntrack_ovs. Signed-off-by: Xin Long Reviewed-by: Aaron Conole Reviewed-by: Nikolay Aleksandrov Signed-off-by: Florian Westphal --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/big_tcp.sh | 180 +++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100755 tools/testing/selftests/net/big_tcp.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6cd8993454d7e..099741290184e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -48,6 +48,7 @@ TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh +TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh new file mode 100755 index 0000000000000..cde9a91c47971 --- /dev/null +++ b/tools/testing/selftests/net/big_tcp.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For IPv4 and IPv6 BIG TCP. +# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="198.51.100.1" +CLIENT_IP6="2001:db8:1::1" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="203.0.113.1" +SERVER_IP6="2001:db8:2::1" + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +SERVER_GW4="203.0.113.2" +CLIENT_GW4="198.51.100.2" +SERVER_GW6="2001:db8:2::2" +CLIENT_GW6="2001:db8:1::2" + +MAX_SIZE=128000 +CHK_SIZE=65535 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +setup() { + ip netns add $CLIENT_NS + ip netns add $SERVER_NS + ip netns add $ROUTER_NS + ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS + ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS + + ip -net $CLIENT_NS link set link0 up + ip -net $CLIENT_NS link set link0 mtu 1442 + ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0 + ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad + ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4 + ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6 + ip -net $CLIENT_NS link set dev link0 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $CLIENT_NS link set dev link0 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + + ip -net $ROUTER_NS link set link1 up + ip -net $ROUTER_NS link set link2 up + ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1 + ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad + ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2 + ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad + ip -net $ROUTER_NS link set dev link1 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link1 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action. + ip net exec $ROUTER_NS tc qdisc add dev link1 ingress + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ip flower ip_proto tcp action ct + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ipv6 flower ip_proto tcp action ct + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip -net $SERVER_NS link set link3 up + ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3 + ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad + ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4 + ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6 + ip -net $SERVER_NS link set dev link3 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $SERVER_NS link set dev link3 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + ip net exec $SERVER_NS netserver 2>&1 >/dev/null +} + +cleanup() { + ip net exec $SERVER_NS pkill netserver + ip -net $ROUTER_NS link del link1 + ip -net $ROUTER_NS link del link2 + ip netns del "$CLIENT_NS" + ip netns del "$SERVER_NS" + ip netns del "$ROUTER_NS" +} + +start_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +check_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0" +} + +stop_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +do_netperf() { + local serip=$SERVER_IP4 + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 + ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null +} + +do_test() { + local cli_tso=$1 + local gw_gro=$2 + local gw_tso=$3 + local ser_gro=$4 + local ret="PASS" + + ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso + ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro + ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso + ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro + + start_counter link1 $ROUTER_NS + start_counter link3 $SERVER_NS + do_netperf $CLIENT_NS + + if check_counter link1 $ROUTER_NS; then + check_counter link3 $SERVER_NS || ret="FAIL_on_link3" + else + ret="FAIL_on_link1" + fi + + stop_counter link1 $ROUTER_NS + stop_counter link3 $SERVER_NS + printf "%-9s %-8s %-8s %-8s: [%s]\n" \ + $cli_tso $gw_gro $gw_tso $ser_gro $ret + test $ret = "PASS" +} + +testup() { + echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \ + do_test "on" "on" "on" "on" && \ + do_test "on" "off" "on" "off" && \ + do_test "off" "on" "on" "on" && \ + do_test "on" "on" "off" "on" && \ + do_test "off" "on" "off" "on" +} + +if ! netperf -V &> /dev/null; then + echo "SKIP: Could not run test without netperf tool" + exit $ksft_skip +fi + +if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then + echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link" + exit $ksft_skip +fi + +trap cleanup EXIT +setup && echo "Testing for BIG TCP:" && \ +NF=4 testup && echo "***v4 Tests Done***" && \ +NF=6 testup && echo "***v6 Tests Done***" +exit $? -- GitLab From e5d015a114dad6a50c22cf282007f7d92086df46 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:30:48 +0000 Subject: [PATCH 0246/2078] netfilter: conntrack: fix typo There's a spelling mistake in a comment. Fix it. Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7250082e7de56..004c54132a3b5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1294,7 +1294,7 @@ dying: } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); -/* Returns true if a connection correspondings to the tuple (required +/* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, -- GitLab From b0ca200077b3872056e6a8291c9a50f803658c2a Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:30:49 +0000 Subject: [PATCH 0247/2078] netfilter: nat: fix indentation of function arguments A couple of arguments to a function call are incorrectly indented. Fix them. Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal --- net/netfilter/nf_nat_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e29e4ccb5c5a3..ce829d434f138 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; -- GitLab From 12fabae03ca6474fd571bf6ddb37d009533305d6 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 8 Mar 2023 11:37:13 +0100 Subject: [PATCH 0248/2078] selftests/bpf: Fix IMA test Commit 62622dab0a28 ("ima: return IMA digest value only when IMA_COLLECTED flag is set") caused bpf_ima_inode_hash() to refuse to give non-fresh digests. IMA test #3 assumed the old behavior, that bpf_ima_inode_hash() still returned also non-fresh digests. Correct the test by accepting both cases. If the samples returned are 1, assume that the commit above is applied and that the returned digest is fresh. If the samples returned are 2, assume that the commit above is not applied, and check both the non-fresh and fresh digest. Fixes: 62622dab0a28 ("ima: return IMA digest value only when IMA_COLLECTED flag is set") Reported-by: David Vernet Signed-off-by: Roberto Sassu Signed-off-by: Andrii Nakryiko Reviewed-by: Matt Bobrowski Link: https://lore.kernel.org/bpf/20230308103713.1681200-1-roberto.sassu@huaweicloud.com --- .../selftests/bpf/prog_tests/test_ima.c | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index b13feceb38f1a..810b14981c2eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -70,7 +70,7 @@ void test_test_ima(void) u64 bin_true_sample; char cmd[256]; - int err, duration = 0; + int err, duration = 0, fresh_digest_idx = 0; struct ima *skel = NULL; skel = ima__open_and_load(); @@ -129,7 +129,15 @@ void test_test_ima(void) /* * Test #3 * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest - * - Expected result: 2 samples (/bin/true: non-fresh, fresh) + * - Expected result: + * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied + * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is + * not applied + * + * If commit 62622dab0a28 ("ima: return IMA digest value only when + * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses + * to give a non-fresh digest, hence the correct result is 1 instead of + * 2. */ test_init(skel->bss); @@ -144,13 +152,18 @@ void test_test_ima(void) goto close_clean; err = ring_buffer__consume(ringbuf); - ASSERT_EQ(err, 2, "num_samples_or_err"); - ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); - ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash"); - ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, "sample_equal_or_err"); + ASSERT_GE(err, 1, "num_samples_or_err"); + if (err == 2) { + ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); + ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, + "sample_equal_or_err"); + fresh_digest_idx = 1; + } + + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash"); /* IMA refreshed the digest. */ - ASSERT_NEQ(ima_hash_from_bpf[1], bin_true_sample, - "sample_different_or_err"); + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample, + "sample_equal_or_err"); /* * Test #4 -- GitLab From 07236eab7a3139da97aef9f5f21f403be82a82ea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:14 -0800 Subject: [PATCH 0249/2078] bpf: factor out fetching basic kfunc metadata Factor out logic to fetch basic kfunc metadata based on struct bpf_insn. This is not exactly short or trivial code to just copy/paste and this information is sometimes necessary in other parts of the verifier logic. Subsequent patches will rely on this to determine if an instruction is a kfunc call to iterator next method. No functional changes intended, including that verbose() warning behavior when kfunc is not allowed for a particular program type. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 92 +++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b2116ca78d9a3..8d40fba6a1c02 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10079,24 +10079,21 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return 0; } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx_p) +static int fetch_kfunc_meta(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_kfunc_call_arg_meta *meta, + const char **kfunc_name) { - const struct btf_type *t, *func, *func_proto, *ptr_type; - u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id; - struct bpf_reg_state *regs = cur_regs(env); - const char *func_name, *ptr_type_name; - bool sleepable, rcu_lock, rcu_unlock; - struct bpf_kfunc_call_arg_meta meta; - int err, insn_idx = *insn_idx_p; - const struct btf_param *args; - const struct btf_type *ret_t; + const struct btf_type *func, *func_proto; + u32 func_id, *kfunc_flags; + const char *func_name; struct btf *desc_btf; - u32 *kfunc_flags; - /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (kfunc_name) + *kfunc_name = NULL; + if (!insn->imm) - return 0; + return -EINVAL; desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) @@ -10105,22 +10102,51 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_id = insn->imm; func = btf_type_by_id(desc_btf, func_id); func_name = btf_name_by_offset(desc_btf, func->name_off); + if (kfunc_name) + *kfunc_name = func_name; func_proto = btf_type_by_id(desc_btf, func->type); kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); if (!kfunc_flags) { - verbose(env, "calling kernel function %s is not allowed\n", - func_name); return -EACCES; } - /* Prepare kfunc call metadata */ - memset(&meta, 0, sizeof(meta)); - meta.btf = desc_btf; - meta.func_id = func_id; - meta.kfunc_flags = *kfunc_flags; - meta.func_proto = func_proto; - meta.func_name = func_name; + memset(meta, 0, sizeof(*meta)); + meta->btf = desc_btf; + meta->func_id = func_id; + meta->kfunc_flags = *kfunc_flags; + meta->func_proto = func_proto; + meta->func_name = func_name; + + return 0; +} + +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) +{ + const struct btf_type *t, *ptr_type; + u32 i, nargs, ptr_type_id, release_ref_obj_id; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + bool sleepable, rcu_lock, rcu_unlock; + struct bpf_kfunc_call_arg_meta meta; + struct bpf_insn_aux_data *insn_aux; + int err, insn_idx = *insn_idx_p; + const struct btf_param *args; + const struct btf_type *ret_t; + struct btf *desc_btf; + + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + err = fetch_kfunc_meta(env, insn, &meta, &func_name); + if (err == -EACCES && func_name) + verbose(env, "calling kernel function %s is not allowed\n", func_name); + if (err) + return err; + desc_btf = meta.btf; + insn_aux = &env->insn_aux_data[insn_idx]; if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); @@ -10173,7 +10199,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = release_reference(env, regs[meta.release_regno].ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10185,14 +10211,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", - func_name, func_id); + func_name, meta.func_id); return err; } err = release_reference(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10202,7 +10228,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, set_rbtree_add_callback_state); if (err) { verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10211,7 +10237,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL); if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { /* Only exception is bpf_obj_new_impl */ @@ -10260,11 +10286,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; - env->insn_aux_data[insn_idx].obj_new_size = ret_t->size; - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->obj_new_size = ret_t->size; + insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_obj_drop.btf, meta.arg_obj_drop.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || @@ -10397,8 +10423,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ - nargs = btf_type_vlen(func_proto); - args = (const struct btf_param *)(func_proto + 1); + nargs = btf_type_vlen(meta.func_proto); + args = (const struct btf_param *)(meta.func_proto + 1); for (i = 0; i < nargs; i++) { u32 regno = i + 1; -- GitLab From 215bf4962f6c9605710012fad222a5fec001b3ad Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:15 -0800 Subject: [PATCH 0250/2078] bpf: add iterator kfuncs registration and validation logic Add ability to register kfuncs that implement BPF open-coded iterator contract and enforce naming and function proto convention. Enforcement happens at the time of kfunc registration and significantly simplifies the rest of iterators logic in the verifier. More details follow in subsequent patches, but we enforce the following conditions. All kfuncs (constructor, next, destructor) have to be named consistenly as bpf_iter__{new,next,destroy}(), respectively. represents iterator type, and iterator state should be represented as a matching `struct bpf_iter_` state type. Also, all iter kfuncs should have a pointer to this `struct bpf_iter_` as the very first argument. Additionally: - Constructor, i.e., bpf_iter__new(), can have arbitrary extra number of arguments. Return type is not enforced either. - Next method, i.e., bpf_iter__next(), has to return a pointer type and should have exactly one argument: `struct bpf_iter_ *` (const/volatile/restrict and typedefs are ignored). - Destructor, i.e., bpf_iter__destroy(), should return void and should have exactly one argument, similar to the next method. - struct bpf_iter_ size is enforced to be positive and a multiple of 8 bytes (to fit stack slots correctly). Such strictness and consistency allows to build generic helpers abstracting important, but boilerplate, details to be able to use open-coded iterators effectively and ergonomically (see bpf_for_each() in subsequent patches). It also simplifies the verifier logic in some places. At the same time, this doesn't hurt generality of possible iterator implementations. Win-win. Constructor kfunc is marked with a new KF_ITER_NEW flags, next method is marked with KF_ITER_NEXT (and should also have KF_RET_NULL, of course), while destructor kfunc is marked as KF_ITER_DESTROY. Additionally, we add a trivial kfunc name validation: it should be a valid non-NULL and non-empty string. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 + include/linux/btf.h | 4 ++ kernel/bpf/btf.c | 112 ++++++++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 18538bad2b8c7..e2dc7f0644495 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,8 @@ struct bpf_active_lock { u32 id; }; +#define ITER_PREFIX "bpf_iter_" + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; diff --git a/include/linux/btf.h b/include/linux/btf.h index 556b3e2e7471e..1bba0827e8c46 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -71,6 +71,10 @@ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ +/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */ +#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ +#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ +#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a8cb09e5973b7..71758cd15b070 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7596,6 +7596,108 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE +static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, + const struct btf_type *func, u32 func_flags) +{ + u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); + const char *name, *sfx, *iter_name; + const struct btf_param *arg; + const struct btf_type *t; + char exp_name[128]; + u32 nr_args; + + /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ + if (!flags || (flags & (flags - 1))) + return -EINVAL; + + /* any BPF iter kfunc should have `struct bpf_iter_ *` first arg */ + nr_args = btf_type_vlen(func); + if (nr_args < 1) + return -EINVAL; + + arg = &btf_params(func)[0]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, NULL); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + /* sizeof(struct bpf_iter_) should be a multiple of 8 to + * fit nicely in stack slots + */ + if (t->size == 0 || (t->size % 8)) + return -EINVAL; + + /* validate bpf_iter__{new,next,destroy}(struct bpf_iter_ *) + * naming pattern + */ + iter_name = name + sizeof(ITER_PREFIX) - 1; + if (flags & KF_ITER_NEW) + sfx = "new"; + else if (flags & KF_ITER_NEXT) + sfx = "next"; + else /* (flags & KF_ITER_DESTROY) */ + sfx = "destroy"; + + snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx); + if (strcmp(func_name, exp_name)) + return -EINVAL; + + /* only iter constructor should have extra arguments */ + if (!(flags & KF_ITER_NEW) && nr_args != 1) + return -EINVAL; + + if (flags & KF_ITER_NEXT) { + /* bpf_iter__next() should return pointer */ + t = btf_type_skip_modifiers(btf, func->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + + if (flags & KF_ITER_DESTROY) { + /* bpf_iter__destroy() should return void */ + t = btf_type_by_id(btf, func->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + } + + return 0; +} + +static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) +{ + const struct btf_type *func; + const char *func_name; + int err; + + /* any kfunc should be FUNC -> FUNC_PROTO */ + func = btf_type_by_id(btf, func_id); + if (!func || !btf_type_is_func(func)) + return -EINVAL; + + /* sanity check kfunc name */ + func_name = btf_name_by_offset(btf, func->name_off); + if (!func_name || !func_name[0]) + return -EINVAL; + + func = btf_type_by_id(btf, func->type); + if (!func || !btf_type_is_func_proto(func)) + return -EINVAL; + + if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) { + err = btf_check_iter_kfuncs(btf, func_name, func, func_flags); + if (err) + return err; + } + + return 0; +} + /* Kernel Function (kfunc) BTF ID set registration API */ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, @@ -7772,7 +7874,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, const struct btf_kfunc_id_set *kset) { struct btf *btf; - int ret; + int ret, i; btf = btf_get_module_btf(kset->owner); if (!btf) { @@ -7789,7 +7891,15 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, if (IS_ERR(btf)) return PTR_ERR(btf); + for (i = 0; i < kset->set->cnt; i++) { + ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, + kset->set->pairs[i].flags); + if (ret) + goto err_out; + } + ret = btf_populate_kfunc_set(btf, hook, kset->set); +err_out: btf_put(btf); return ret; } -- GitLab From 06accc8779c1d558a5b5a21f2ac82b0c95827ddd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:16 -0800 Subject: [PATCH 0251/2078] bpf: add support for open-coded iterator loops Teach verifier about the concept of the open-coded (or inline) iterators. This patch adds generic iterator loop verification logic, new STACK_ITER stack slot type to contain iterator state, and necessary kfunc plumbing for iterator's constructor, destructor and next methods. Next patch implements first specific iterator (numbers iterator for implementing for() loop logic). Such split allows to have more focused commits for verifier logic and separate commit that we could point later to demonstrating what does it take to add a new kind of iterator. Each kind of iterator has its own associated struct bpf_iter_, where denotes a specific type of iterator. struct bpf_iter_ state is supposed to live on BPF program stack, so there will be no way to change its size later on without breaking backwards compatibility, so choose wisely! But given this struct is specific to a given of iterator, this allows a lot of flexibility: simple iterators could be fine with just one stack slot (8 bytes), like numbers iterator in the next patch, while some other more complicated iterators might need way more to keep their iterator state. Either way, such design allows to avoid runtime memory allocations, which otherwise would be necessary if we fixed on-the-stack size and it turned out to be too small for a given iterator implementation. The way BPF verifier logic is implemented, there are no artificial restrictions on a number of active iterators, it should work correctly using multiple active iterators at the same time. This also means you can have multiple nested iteration loops. struct bpf_iter_ reference can be safely passed to subprograms as well. General flow is easiest to demonstrate with a simple example using number iterator implemented in next patch. Here's the simplest possible loop: struct bpf_iter_num it; int *v; bpf_iter_num_new(&it, 2, 5); while ((v = bpf_iter_num_next(&it))) { bpf_printk("X = %d", *v); } bpf_iter_num_destroy(&it); Above snippet should output "X = 2", "X = 3", "X = 4". Note that 5 is exclusive and is not returned. This matches similar APIs (e.g., slices in Go or Rust) that implement a range of elements, where end index is non-inclusive. In the above example, we see a trio of function: - constructor, bpf_iter_num_new(), which initializes iterator state (struct bpf_iter_num it) on the stack. If any of the input arguments are invalid, constructor should make sure to still initialize it such that subsequent bpf_iter_num_next() calls will return NULL. I.e., on error, return error and construct empty iterator. - next method, bpf_iter_num_next(), which accepts pointer to iterator state and produces an element. Next method should always return a pointer. The contract between BPF verifier is that next method will always eventually return NULL when elements are exhausted. Once NULL is returned, subsequent next calls should keep returning NULL. In the case of numbers iterator, bpf_iter_num_next() returns a pointer to an int (storage for this integer is inside the iterator state itself), which can be dereferenced after corresponding NULL check. - once done with the iterator, it's mandated that user cleans up its state with the call to destructor, bpf_iter_num_destroy() in this case. Destructor frees up any resources and marks stack space used by struct bpf_iter_num as usable for something else. Any other iterator implementation will have to implement at least these three methods. It is enforced that for any given type of iterator only applicable constructor/destructor/next are callable. I.e., verifier ensures you can't pass number iterator state into, say, cgroup iterator's next method. It is important to keep the naming pattern consistent to be able to create generic macros to help with BPF iter usability. E.g., one of the follow up patches adds generic bpf_for_each() macro to bpf_misc.h in selftests, which allows to utilize iterator "trio" nicely without having to code the above somewhat tedious loop explicitly every time. This is enforced at kfunc registration point by one of the previous patches in this series. At the implementation level, iterator state tracking for verification purposes is very similar to dynptr. We add STACK_ITER stack slot type, reserve necessary number of slots, depending on sizeof(struct bpf_iter_), and keep track of necessary extra state in the "main" slot, which is marked with non-zero ref_obj_id. Other slots are also marked as STACK_ITER, but have zero ref_obj_id. This is simpler than having a separate "is_first_slot" flag. Another big distinction is that STACK_ITER is *always refcounted*, which simplifies implementation without sacrificing usability. So no need for extra "iter_id", no need to anticipate reuse of STACK_ITER slots for new constructors, etc. Keeping it simple here. As far as the verification logic goes, there are two extensive comments: in process_iter_next_call() and iter_active_depths_differ() explaining some important and sometimes subtle aspects. Please refer to them for details. But from 10,000-foot point of view, next methods are the points of forking a verification state, which are conceptually similar to what verifier is doing when validating conditional jump. We branch out at a `call bpf_iter__next` instruction and simulate two outcomes: NULL (iteration is done) and non-NULL (new element is returned). NULL is simulated first and is supposed to reach exit without looping. After that non-NULL case is validated and it either reaches exit (for trivial examples with no real loop), or reaches another `call bpf_iter__next` instruction with the state equivalent to already (partially) validated one. State equivalency at that point means we technically are going to be looping forever without "breaking out" out of established "state envelope" (i.e., subsequent iterations don't add any new knowledge or constraints to the verifier state, so running 1, 2, 10, or a million of them doesn't matter). But taking into account the contract stating that iterator next method *has to* return NULL eventually, we can conclude that loop body is safe and will eventually terminate. Given we validated logic outside of the loop (NULL case), and concluded that loop body is safe (though potentially looping many times), verifier can claim safety of the overall program logic. The rest of the patch is necessary plumbing for state tracking, marking, validation, and necessary further kfunc plumbing to allow implementing iterator constructor, destructor, and next methods. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 23 ++ kernel/bpf/verifier.c | 595 ++++++++++++++++++++++++++++++++++- 2 files changed, 610 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e2dc7f0644495..0c052bc799401 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -61,6 +61,12 @@ struct bpf_active_lock { #define ITER_PREFIX "bpf_iter_" +enum bpf_iter_state { + BPF_ITER_STATE_INVALID, /* for non-first slot */ + BPF_ITER_STATE_ACTIVE, + BPF_ITER_STATE_DRAINED, +}; + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; @@ -105,6 +111,18 @@ struct bpf_reg_state { bool first_slot; } dynptr; + /* For bpf_iter stack slots */ + struct { + /* BTF container and BTF type ID describing + * struct bpf_iter_ of an iterator state + */ + struct btf *btf; + u32 btf_id; + /* packing following two fields to fit iter state into 16 bytes */ + enum bpf_iter_state state:2; + int depth:30; + } iter; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -143,6 +161,8 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. * For stack slots that are dynptrs, this is used to track references to * the dynptr to determine proper reference freeing. + * Similarly to dynptrs, we use ID to track "belonging" of a reference + * to a specific instance of bpf_iter. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -213,9 +233,11 @@ enum bpf_stack_slot_type { * is stored in bpf_stack_state->spilled_ptr.dynptr.type */ STACK_DYNPTR, + STACK_ITER, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + #define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) #define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) @@ -450,6 +472,7 @@ struct bpf_insn_aux_data { bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ + bool is_iter_next; /* bpf_iter__next() kfunc call */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8d40fba6a1c02..45a0822844648 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -302,6 +302,10 @@ struct bpf_kfunc_call_arg_meta { enum bpf_dynptr_type type; u32 id; } initialized_dynptr; + struct { + u8 spi; + u8 frameno; + } iter; u64 mem_size; }; @@ -668,6 +672,7 @@ static char slot_type_char[] = { [STACK_MISC] = 'm', [STACK_ZERO] = '0', [STACK_DYNPTR] = 'd', + [STACK_ITER] = 'i', }; static void print_liveness(struct bpf_verifier_env *env, @@ -742,6 +747,11 @@ static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *re return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); } +static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots) +{ + return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); +} + static const char *kernel_type_name(const struct btf* btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); @@ -766,6 +776,30 @@ static const char *dynptr_type_str(enum bpf_dynptr_type type) } } +static const char *iter_type_str(const struct btf *btf, u32 btf_id) +{ + if (!btf || btf_id == 0) + return ""; + + /* we already validated that type is valid and has conforming name */ + return kernel_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; +} + +static const char *iter_state_str(enum bpf_iter_state state) +{ + switch (state) { + case BPF_ITER_STATE_ACTIVE: + return "active"; + case BPF_ITER_STATE_DRAINED: + return "drained"; + case BPF_ITER_STATE_INVALID: + return ""; + default: + WARN_ONCE(1, "unknown iter state %d\n", state); + return ""; + } +} + static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; @@ -1118,6 +1152,157 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg } } +static void __mark_reg_known_zero(struct bpf_reg_state *reg); + +static int mark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int insn_idx, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j, id; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = i == 0 ? id : 0; + st->iter.btf = btf; + st->iter.btf_id = btf_id; + st->iter.state = BPF_ITER_STATE_ACTIVE; + st->iter.depth = 0; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_ITER; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static int unmark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + if (i == 0) + WARN_ON_ONCE(release_reference(env, st->ref_obj_id)); + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = iter_get_spi(env, reg, nr_slots); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] == STACK_ITER) + return false; + } + + return true; +} + +static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + /* only main (first) slot has ref_obj_id set */ + if (i == 0 && !st->ref_obj_id) + return false; + if (i != 0 && st->ref_obj_id) + return false; + if (st->iter.btf != btf || st->iter.btf_id != btf_id) + return false; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] != STACK_ITER) + return false; + } + + return true; +} + +/* Check if given stack slot is "special": + * - spilled register state (STACK_SPILL); + * - dynptr state (STACK_DYNPTR); + * - iter state (STACK_ITER). + */ +static bool is_stack_slot_special(const struct bpf_stack_state *stack) +{ + enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1]; + + switch (type) { + case STACK_SPILL: + case STACK_DYNPTR: + case STACK_ITER: + return true; + case STACK_INVALID: + case STACK_MISC: + case STACK_ZERO: + return false; + default: + WARN_ONCE(1, "unknown stack slot type %d\n", type); + return true; + } +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -1267,6 +1452,19 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (reg->ref_obj_id) verbose(env, "(ref_id=%d)", reg->ref_obj_id); break; + case STACK_ITER: + /* only main slot has ref_obj_id set; skip others */ + reg = &state->stack[i].spilled_ptr; + if (!reg->ref_obj_id) + continue; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)", + iter_type_str(reg->iter.btf, reg->iter.btf_id), + reg->ref_obj_id, iter_state_str(reg->iter.state), + reg->iter.depth); + break; case STACK_MISC: case STACK_ZERO: default: @@ -2710,6 +2908,25 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state * state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); } +static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int err, i; + + for (i = 0; i < nr_slots; i++) { + struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; + + err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + if (err) + return err; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -3691,8 +3908,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; - /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ - if (is_spilled_reg(&state->stack[spi])) + /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */ + if (is_stack_slot_special(&state->stack[spi])) for (i = 0; i < BPF_REG_SIZE; i++) scrub_spilled_slot(&state->stack[spi].slot_type[i]); @@ -6506,6 +6723,203 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return err; } +static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) +{ + struct bpf_func_state *state = func(env, reg); + + return state->stack[spi].spilled_ptr.ref_obj_id; +} + +static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); +} + +static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEW; +} + +static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEXT; +} + +static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_DESTROY; +} + +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg) +{ + /* btf_check_iter_kfuncs() guarantees that first argument of any iter + * kfunc is iter state pointer + */ + return arg == 0 && is_iter_kfunc(meta); +} + +static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + const struct btf_type *t; + const struct btf_param *arg; + int spi, err, i, nr_slots; + u32 btf_id; + + /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */ + arg = &btf_params(meta->func_proto)[0]; + t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */ + t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ + nr_slots = t->size / BPF_REG_SIZE; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0 && spi != -ERANGE) + return spi; + + meta->iter.spi = spi; + meta->iter.frameno = reg->frameno; + + if (is_iter_new_kfunc(meta)) { + /* bpf_iter__new() expects pointer to uninit iter state */ + if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { + verbose(env, "expected uninitialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { + err = check_mem_access(env, insn_idx, regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots); + if (err) + return err; + } else { + /* iter_next() or iter_destroy() expect initialized iter state*/ + if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) { + verbose(env, "expected an initialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + err = mark_iter_read(env, reg, spi, nr_slots); + if (err) + return err; + + meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); + + if (is_iter_destroy_kfunc(meta)) { + err = unmark_stack_slots_iter(env, reg, nr_slots); + if (err) + return err; + } + } + + return 0; +} + +/* process_iter_next_call() is called when verifier gets to iterator's next + * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer + * to it as just "iter_next()" in comments below. + * + * BPF verifier relies on a crucial contract for any iter_next() + * implementation: it should *eventually* return NULL, and once that happens + * it should keep returning NULL. That is, once iterator exhausts elements to + * iterate, it should never reset or spuriously return new elements. + * + * With the assumption of such contract, process_iter_next_call() simulates + * a fork in the verifier state to validate loop logic correctness and safety + * without having to simulate infinite amount of iterations. + * + * In current state, we first assume that iter_next() returned NULL and + * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such + * conditions we should not form an infinite loop and should eventually reach + * exit. + * + * Besides that, we also fork current state and enqueue it for later + * verification. In a forked state we keep iterator state as ACTIVE + * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We + * also bump iteration depth to prevent erroneous infinite loop detection + * later on (see iter_active_depths_differ() comment for details). In this + * state we assume that we'll eventually loop back to another iter_next() + * calls (it could be in exactly same location or in some other instruction, + * it doesn't matter, we don't make any unnecessary assumptions about this, + * everything revolves around iterator state in a stack slot, not which + * instruction is calling iter_next()). When that happens, we either will come + * to iter_next() with equivalent state and can conclude that next iteration + * will proceed in exactly the same way as we just verified, so it's safe to + * assume that loop converges. If not, we'll go on another iteration + * simulation with a different input state, until all possible starting states + * are validated or we reach maximum number of instructions limit. + * + * This way, we will either exhaustively discover all possible input states + * that iterator loop can start with and eventually will converge, or we'll + * effectively regress into bounded loop simulation logic and either reach + * maximum number of instructions if loop is not provably convergent, or there + * is some statically known limit on number of iterations (e.g., if there is + * an explicit `if n > 100 then break;` statement somewhere in the loop). + * + * One very subtle but very important aspect is that we *always* simulate NULL + * condition first (as the current state) before we simulate non-NULL case. + * This has to do with intricacies of scalar precision tracking. By simulating + * "exit condition" of iter_next() returning NULL first, we make sure all the + * relevant precision marks *that will be set **after** we exit iterator loop* + * are propagated backwards to common parent state of NULL and non-NULL + * branches. Thanks to that, state equivalence checks done later in forked + * state, when reaching iter_next() for ACTIVE iterator, can assume that + * precision marks are finalized and won't change. Because simulating another + * ACTIVE iterator iteration won't change them (because given same input + * states we'll end up with exactly same output states which we are currently + * comparing; and verification after the loop already propagated back what + * needs to be **additionally** tracked as precise). It's subtle, grok + * precision tracking for more intuitive understanding. + */ +static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; + struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; + struct bpf_reg_state *cur_iter, *queued_iter; + int iter_frameno = meta->iter.frameno; + int iter_spi = meta->iter.spi; + + BTF_TYPE_EMIT(struct bpf_iter); + + cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + + if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && + cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { + verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n", + cur_iter->iter.state, iter_state_str(cur_iter->iter.state)); + return -EFAULT; + } + + if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { + /* branch out active iter state */ + queued_st = push_stack(env, insn_idx + 1, insn_idx, false); + if (!queued_st) + return -ENOMEM; + + queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; + queued_iter->iter.depth++; + + queued_fr = queued_st->frame[queued_st->curframe]; + mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); + } + + /* switch to DRAINED state, but keep the depth unchanged */ + /* mark current iter state as drained and assume returned NULL */ + cur_iter->iter.state = BPF_ITER_STATE_DRAINED; + __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]); + + return 0; +} + static bool arg_type_is_mem_size(enum bpf_arg_type type) { return type == ARG_CONST_SIZE || @@ -9099,6 +9513,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ KF_ARG_PTR_TO_DYNPTR, + KF_ARG_PTR_TO_ITER, KF_ARG_PTR_TO_LIST_HEAD, KF_ARG_PTR_TO_LIST_NODE, KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ @@ -9220,6 +9635,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR; + if (is_kfunc_arg_iter(meta, argno)) + return KF_ARG_PTR_TO_ITER; + if (is_kfunc_arg_list_head(meta->btf, &args[argno])) return KF_ARG_PTR_TO_LIST_HEAD; @@ -9848,6 +10266,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_KPTR: case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: case KF_ARG_PTR_TO_LIST_NODE: case KF_ARG_PTR_TO_RB_ROOT: @@ -9944,6 +10363,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; } + case KF_ARG_PTR_TO_ITER: + ret = process_iter_arg(env, regno, insn_idx, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_LIST_HEAD: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { @@ -10148,6 +10572,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc_btf = meta.btf; insn_aux = &env->insn_aux_data[insn_idx]; + insn_aux->is_iter_next = is_iter_next_kfunc(&meta); + if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); return -EACCES; @@ -10436,6 +10862,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_btf_func_reg_size(env, regno, t->size); } + if (is_iter_next_kfunc(&meta)) { + err = process_iter_next_call(env, insn_idx, &meta); + if (err) + return err; + } + return 0; } @@ -13548,6 +13980,13 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + struct bpf_kfunc_call_arg_meta meta; + + ret = fetch_kfunc_meta(env, insn, &meta, NULL); + if (ret == 0 && is_iter_next_kfunc(&meta)) + mark_prune_point(env, t); + } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); case BPF_JA: @@ -14301,6 +14740,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * didn't use them */ for (i = 0; i < old->allocated_stack; i++) { + struct bpf_reg_state *old_reg, *cur_reg; + spi = i / BPF_REG_SIZE; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { @@ -14357,9 +14798,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return false; break; case STACK_DYNPTR: - { - const struct bpf_reg_state *old_reg, *cur_reg; - old_reg = &old->stack[spi].spilled_ptr; cur_reg = &cur->stack[spi].spilled_ptr; if (old_reg->dynptr.type != cur_reg->dynptr.type || @@ -14367,7 +14805,22 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; - } + case STACK_ITER: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + /* iter.depth is not compared between states as it + * doesn't matter for correctness and would otherwise + * prevent convergence; we maintain it only to prevent + * infinite loop check triggering, see + * iter_active_depths_differ() + */ + if (old_reg->iter.btf != cur_reg->iter.btf || + old_reg->iter.btf_id != cur_reg->iter.btf_id || + old_reg->iter.state != cur_reg->iter.state || + /* ignore {old_reg,cur_reg}->iter.depth, see above */ + !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -14626,6 +15079,92 @@ static bool states_maybe_looping(struct bpf_verifier_state *old, return true; } +static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].is_iter_next; +} + +/* is_state_visited() handles iter_next() (see process_iter_next_call() for + * terminology) calls specially: as opposed to bounded BPF loops, it *expects* + * states to match, which otherwise would look like an infinite loop. So while + * iter_next() calls are taken care of, we still need to be careful and + * prevent erroneous and too eager declaration of "ininite loop", when + * iterators are involved. + * + * Here's a situation in pseudo-BPF assembly form: + * + * 0: again: ; set up iter_next() call args + * 1: r1 = &it ; + * 2: call bpf_iter_num_next ; this is iter_next() call + * 3: if r0 == 0 goto done + * 4: ... something useful here ... + * 5: goto again ; another iteration + * 6: done: + * 7: r1 = &it + * 8: call bpf_iter_num_destroy ; clean up iter state + * 9: exit + * + * This is a typical loop. Let's assume that we have a prune point at 1:, + * before we get to `call bpf_iter_num_next` (e.g., because of that `goto + * again`, assuming other heuristics don't get in a way). + * + * When we first time come to 1:, let's say we have some state X. We proceed + * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. + * Now we come back to validate that forked ACTIVE state. We proceed through + * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we + * are converging. But the problem is that we don't know that yet, as this + * convergence has to happen at iter_next() call site only. So if nothing is + * done, at 1: verifier will use bounded loop logic and declare infinite + * looping (and would be *technically* correct, if not for iterator's + * "eventual sticky NULL" contract, see process_iter_next_call()). But we + * don't want that. So what we do in process_iter_next_call() when we go on + * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's + * a different iteration. So when we suspect an infinite loop, we additionally + * check if any of the *ACTIVE* iterator states depths differ. If yes, we + * pretend we are not looping and wait for next iter_next() call. + * + * This only applies to ACTIVE state. In DRAINED state we don't expect to + * loop, because that would actually mean infinite loop, as DRAINED state is + * "sticky", and so we'll keep returning into the same instruction with the + * same state (at least in one of possible code paths). + * + * This approach allows to keep infinite loop heuristic even in the face of + * active iterator. E.g., C snippet below is and will be detected as + * inifintely looping: + * + * struct bpf_iter_num it; + * int *p, x; + * + * bpf_iter_num_new(&it, 0, 10); + * while ((p = bpf_iter_num_next(&t))) { + * x = p; + * while (x--) {} // <<-- infinite loop here + * } + * + */ +static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) +{ + struct bpf_reg_state *slot, *cur_slot; + struct bpf_func_state *state; + int i, fr; + + for (fr = old->curframe; fr >= 0; fr--) { + state = old->frame[fr]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_ITER) + continue; + + slot = &state->stack[i].spilled_ptr; + if (slot->iter.state != BPF_ITER_STATE_ACTIVE) + continue; + + cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; + if (cur_slot->iter.depth != slot->iter.depth) + return true; + } + } + return false; +} static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { @@ -14673,8 +15212,46 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * Since the verifier still needs to catch infinite loops * inside async callbacks. */ - } else if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur)) { + goto skip_inf_loop_check; + } + /* BPF open-coded iterators loop detection is special. + * states_maybe_looping() logic is too simplistic in detecting + * states that *might* be equivalent, because it doesn't know + * about ID remapping, so don't even perform it. + * See process_iter_next_call() and iter_active_depths_differ() + * for overview of the logic. When current and one of parent + * states are detected as equivalent, it's a good thing: we prove + * convergence and can stop simulating further iterations. + * It's safe to assume that iterator loop will finish, taking into + * account iter_next() contract of eventually returning + * sticky NULL result. + */ + if (is_iter_next_insn(env, insn_idx)) { + if (states_equal(env, &sl->state, cur)) { + struct bpf_func_state *cur_frame; + struct bpf_reg_state *iter_state, *iter_reg; + int spi; + + cur_frame = cur->frame[cur->curframe]; + /* btf_check_iter_kfuncs() enforces that + * iter state pointer is always the first arg + */ + iter_reg = &cur_frame->regs[BPF_REG_1]; + /* current state is valid due to states_equal(), + * so we can assume valid iter and reg state, + * no need for extra (re-)validations + */ + spi = __get_spi(iter_reg->off + iter_reg->var_off.value); + iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; + if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) + goto hit; + } + goto skip_inf_loop_check; + } + /* attempt to detect infinite loop to avoid unnecessary doomed work */ + if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur) && + !iter_active_depths_differ(&sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); return -EINVAL; @@ -14691,6 +15268,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * This threshold shouldn't be too high either, since states * at the end of the loop are likely to be useful in pruning. */ +skip_inf_loop_check: if (!env->test_state_freq && env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) @@ -14698,6 +15276,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto miss; } if (states_equal(env, &sl->state, cur)) { +hit: sl->hit_cnt++; /* reached equivalent register/stack state, * prune the search. -- GitLab From 6018e1f407cccf39b804d1f75ad4de7be4e6cc45 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:17 -0800 Subject: [PATCH 0252/2078] bpf: implement numbers iterator Implement the first open-coded iterator type over a range of integers. It's public API consists of: - bpf_iter_num_new() constructor, which accepts [start, end) range (that is, start is inclusive, end is exclusive). - bpf_iter_num_next() which will keep returning read-only pointer to int until the range is exhausted, at which point NULL will be returned. If bpf_iter_num_next() is kept calling after this, NULL will be persistently returned. - bpf_iter_num_destroy() destructor, which needs to be called at some point to clean up iterator state. BPF verifier enforces that iterator destructor is called at some point before BPF program exits. Note that `start = end = X` is a valid combination to setup an empty iterator. bpf_iter_num_new() will return 0 (success) for any such combination. If bpf_iter_num_new() detects invalid combination of input arguments, it returns error, resets iterator state to, effectively, empty iterator, so any subsequent call to bpf_iter_num_next() will keep returning NULL. BPF verifier has no knowledge that returned integers are in the [start, end) value range, as both `start` and `end` are not statically known and enforced: they are runtime values. While the implementation is pretty trivial, some care needs to be taken to avoid overflows and underflows. Subsequent selftests will validate correctness of [start, end) semantics, especially around extremes (INT_MIN and INT_MAX). Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can be specified. bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded BPF loops and bpf_loop() helper and is the basis for implementing ergonomic BPF loops with no statically known or verified bounds. Subsequent patches implement bpf_for() macro, demonstrating how this can be wrapped into something that works and feels like a normal for() loop in C language. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +++- include/uapi/linux/bpf.h | 8 ++++ kernel/bpf/bpf_iter.c | 70 ++++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 3 ++ tools/include/uapi/linux/bpf.h | 8 ++++ 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6792a7940e1e6..e64ff1e89fb2a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1617,8 +1617,12 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -/* Maximum number of loops for bpf_loop */ -#define BPF_MAX_LOOPS BIT(23) +/* Maximum number of loops for bpf_loop and bpf_iter_num. + * It's enum to expose it (and thus make it discoverable) through BTF. + */ +enum { + BPF_MAX_LOOPS = 8 * 1024 * 1024, +}; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 976b194eb7754..4abddb668a107 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5dc307bdeaebc..96856f130cbff 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +struct bpf_iter_num_kern { + int cur; /* current value, inclusive */ + int end; /* final value, exclusive */ +} __aligned(8); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) +{ + struct bpf_iter_num_kern *s = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); + + BTF_TYPE_EMIT(struct btf_iter_num); + + /* start == end is legit, it's an empty range and we'll just get NULL + * on first (and any subsequent) bpf_iter_num_next() call + */ + if (start > end) { + s->cur = s->end = 0; + return -EINVAL; + } + + /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ + if ((s64)end - (s64)start > BPF_MAX_LOOPS) { + s->cur = s->end = 0; + return -E2BIG; + } + + /* user will call bpf_iter_num_next() first, + * which will set s->cur to exactly start value; + * underflow shouldn't matter + */ + s->cur = start - 1; + s->end = end; + + return 0; +} + +__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + /* check failed initialization or if we are done (same behavior); + * need to be careful about overflow, so convert to s64 for checks, + * e.g., if s->cur == s->end == INT_MAX, we can't just do + * s->cur + 1 >= s->end + */ + if ((s64)(s->cur + 1) >= s->end) { + s->cur = s->end = 0; + return NULL; + } + + s->cur++; + + return &s->cur; +} + +__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + s->cur = s->end = 0; +} + +__diag_pop(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 637ac4e92e756..f9b7eeedce08b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 976b194eb7754..4abddb668a107 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ -- GitLab From 8c2b5e90505e474f36ecc3b7f3f8298b59d72e91 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:18 -0800 Subject: [PATCH 0253/2078] selftests/bpf: add bpf_for_each(), bpf_for(), and bpf_repeat() macros Add bpf_for_each(), bpf_for(), and bpf_repeat() macros that make writing open-coded iterator-based loops much more convenient and natural. These macros utilize cleanup attribute to ensure proper destruction of the iterator and thanks to that manage to provide the ergonomics that is very close to C language's for() construct. Typical loop would look like: int i; int arr[N]; bpf_for(i, 0, N) { /* verifier will know that i >= 0 && i < N, so could be used to * directly access array elements with no extra checks */ arr[i] = i; } bpf_repeat() is very similar, but it doesn't expose iteration number and is meant as a simple "repeat action N times" loop: bpf_repeat(N) { /* whatever, N times */ } Note that `break` and `continue` statements inside the {} block work as expected. bpf_for_each() is a generalization over any kind of BPF open-coded iterator allowing to use for-each-like approach instead of calling low-level bpf_iter__{new,next,destroy}() APIs explicitly. E.g.: struct cgroup *cg; bpf_for_each(cgroup, cg, some, input, args) { /* do something with each cg */ } would call (not-yet-implemented) bpf_iter_cgroup_{new,next,destroy}() functions to form a loop over cgroups, where `some, input, args` are passed verbatim into constructor as bpf_iter_cgroup_new(&it, some, input, args). As a first demonstration, add pyperf variant based on the bpf_for() loop. Also clean up a few tests that either included bpf_misc.h header unnecessarily from the user-space, which is unsupported, or included it before any common types are defined (and thus leading to unnecessary compilation warnings, potentially). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/bpf_verif_scale.c | 6 ++ .../bpf/prog_tests/uprobe_autoattach.c | 1 - tools/testing/selftests/bpf/progs/bpf_misc.h | 99 +++++++++++++++++++ tools/testing/selftests/bpf/progs/lsm.c | 4 +- tools/testing/selftests/bpf/progs/pyperf.h | 14 ++- .../selftests/bpf/progs/pyperf600_iter.c | 7 ++ .../selftests/bpf/progs/pyperf600_nounroll.c | 3 - 7 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_iter.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 5ca252823294b..731c343897d8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -144,6 +144,12 @@ void test_verif_scale_pyperf600_nounroll() scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_pyperf600_iter() +{ + /* open-coded BPF iterator version */ + scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_loop1() { scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c index 6558c857e6206..d5b3377aa33c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -3,7 +3,6 @@ #include #include "test_uprobe_autoattach.skel.h" -#include "progs/bpf_misc.h" /* uprobe attach point */ static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3, diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index f704885aa5342..597688a188ae6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -75,5 +75,104 @@ #define FUNC_REG_ARG_CNT 5 #endif +struct bpf_iter_num; + +extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; +extern int *bpf_iter_num_next(struct bpf_iter_num *it) __ksym; +extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; + +#ifndef bpf_for_each +/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for + * using BPF open-coded iterators without having to write mundane explicit + * low-level loop logic. Instead, it provides for()-like generic construct + * that can be used pretty naturally. E.g., for some hypothetical cgroup + * iterator, you'd write: + * + * struct cgroup *cg, *parent_cg = <...>; + * + * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) { + * bpf_printk("Child cgroup id = %d", cg->cgroup_id); + * if (cg->cgroup_id == 123) + * break; + * } + * + * I.e., it looks almost like high-level for each loop in other languages, + * supports continue/break, and is verifiable by BPF verifier. + * + * For iterating integers, the difference betwen bpf_for_each(num, i, N, M) + * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to + * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int + * *`, not just `int`. So for integers bpf_for() is more convenient. + * + * Note: this macro relies on C99 feature of allowing to declare variables + * inside for() loop, bound to for() loop lifetime. It also utilizes GCC + * extension: __attribute__((cleanup())), supported by both GCC and + * Clang. + */ +#define bpf_for_each(type, cur, args...) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ + cleanup(bpf_iter_##type##_destroy))), \ + /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ + *___p = (bpf_iter_##type##_new(&___it, ##args), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_##type##_destroy, (void *)0); \ + /* iteration and termination check */ \ + (((cur) = bpf_iter_##type##_next(&___it))); \ +) +#endif /* bpf_for_each */ + +#ifndef bpf_for +/* bpf_for(i, start, end) implements a for()-like looping construct that sets + * provided integer variable *i* to values starting from *start* through, + * but not including, *end*. It also proves to BPF verifier that *i* belongs + * to range [start, end), so this can be used for accessing arrays without + * extra checks. + * + * Note: *start* and *end* are assumed to be expressions with no side effects + * and whose values do not change throughout bpf_for() loop execution. They do + * not have to be statically known or constant, though. + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_for(i, start, end) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, (start), (end)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + ({ \ + /* iteration step */ \ + int *___t = bpf_iter_num_next(&___it); \ + /* termination and bounds check */ \ + (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \ + }); \ +) +#endif /* bpf_for */ + +#ifndef bpf_repeat +/* bpf_repeat(N) performs N iterations without exposing iteration number + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_repeat(N) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, 0, (N)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + bpf_iter_num_next(&___it); \ + /* nothing here */ \ +) +#endif /* bpf_repeat */ #endif diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index dc93887ed34c7..fadfdd98707c4 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -4,12 +4,12 @@ * Copyright 2020 Google LLC. */ -#include "bpf_misc.h" #include "vmlinux.h" +#include #include #include #include -#include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 6c7b1fb268d63..f2e7a31c8d751 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -7,6 +7,7 @@ #include #include #include +#include "bpf_misc.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -294,17 +295,22 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) if (ctx.done) return 0; #else -#ifdef NO_UNROLL +#if defined(USE_ITER) +/* no for loop, no unrolling */ +#elif defined(NO_UNROLL) #pragma clang loop unroll(disable) -#else -#ifdef UNROLL_COUNT +#elif defined(UNROLL_COUNT) #pragma clang loop unroll_count(UNROLL_COUNT) #else #pragma clang loop unroll(full) -#endif #endif /* NO_UNROLL */ /* Unwind python stack */ +#ifdef USE_ITER + int i; + bpf_for(i, 0, STACK_MAX_LEN) { +#else /* !USE_ITER */ for (int i = 0; i < STACK_MAX_LEN; ++i) { +#endif if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c new file mode 100644 index 0000000000000..d62e1b200c30e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Meta Platforms, Inc. and affiliates. +#define STACK_MAX_LEN 600 +#define SUBPROGS +#define NO_UNROLL +#define USE_ITER +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c index 6beff7502f4d7..520b58c4f8db6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c +++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c @@ -2,7 +2,4 @@ // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 600 #define NO_UNROLL -/* clang will not unroll at all. - * Total program size is around 2k insns - */ #include "pyperf.h" -- GitLab From 57400dcce6c2cf3985120c4ee28b37a1f4238dbb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:19 -0800 Subject: [PATCH 0254/2078] selftests/bpf: add iterators tests Add various tests for open-coded iterators. Some of them excercise various possible coding patterns in C, some go down to low-level assembly for more control over various conditions, especially invalid ones. We also make use of bpf_for(), bpf_for_each(), bpf_repeat() macros in some of these tests. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/iters.c | 15 + tools/testing/selftests/bpf/progs/bpf_misc.h | 1 + tools/testing/selftests/bpf/progs/iters.c | 720 ++++++++++++++++++ .../selftests/bpf/progs/iters_looping.c | 163 ++++ .../selftests/bpf/progs/iters_state_safety.c | 426 +++++++++++ 5 files changed, 1325 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/iters.c create mode 100644 tools/testing/selftests/bpf/progs/iters.c create mode 100644 tools/testing/selftests/bpf/progs/iters_looping.c create mode 100644 tools/testing/selftests/bpf/progs/iters_state_safety.c diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c new file mode 100644 index 0000000000000..414fb8d82145a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include + +#include "iters.skel.h" +#include "iters_state_safety.skel.h" +#include "iters_looping.skel.h" + +void test_iters(void) +{ + RUN_TESTS(iters_state_safety); + RUN_TESTS(iters_looping); + RUN_TESTS(iters); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 597688a188ae6..43b154a639e78 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -36,6 +36,7 @@ #define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory" #define __imm(name) [name]"i"(name) #define __imm_addr(name) [name]"i"(&name) +#define __imm_ptr(name) [name]"p"(&name) #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c new file mode 100644 index 0000000000000..84e5dc10243ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static volatile int zero = 0; + +int my_pid; +int arr[256]; +int small_arr[16] SEC(".data.small_arr"); + +#ifdef REAL_TEST +#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 +#else +#define MY_PID_GUARD() ({ }) +#endif + +SEC("?raw_tp") +__failure __msg("math between map_value pointer and register with unbounded min value is not allowed") +int iter_err_unsafe_c_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = zero; /* obscure initial value of i */ + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 1000); + while ((v = bpf_iter_num_next(&it))) { + i++; + } + bpf_iter_num_destroy(&it); + + small_arr[i] = 123; /* invalid */ + + return 0; +} + +SEC("?raw_tp") +__failure __msg("unbounded memory access") +int iter_err_unsafe_asm_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0; + + MY_PID_GUARD(); + + asm volatile ( + "r6 = %[zero];" /* iteration counter */ + "r1 = %[it];" /* iterator state */ + "r2 = 0;" + "r3 = 1000;" + "r4 = 1;" + "call %[bpf_iter_num_new];" + "loop:" + "r1 = %[it];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto out;" + "r6 += 1;" + "goto loop;" + "out:" + "r1 = %[it];" + "call %[bpf_iter_num_destroy];" + "r1 = %[small_arr];" + "r2 = r6;" + "r2 <<= 2;" + "r1 += r2;" + "*(u32 *)(r1 + 0) = r6;" /* invalid */ + : + : [it]"r"(&it), + [small_arr]"p"(small_arr), + [zero]"p"(zero), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_common, "r6" + ); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop_auto_cleanup(const void *ctx) +{ + __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + /* (!) no explicit bpf_iter_num_destroy() */ + + return 0; +} + +SEC("raw_tp") +__success +int iter_for_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_each_macro(const void *ctx) +{ + int *v; + + MY_PID_GUARD(); + + bpf_for_each(num, v, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_macro(const void *ctx) +{ + int i; + + MY_PID_GUARD(); + + bpf_for(i, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", i); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_pragma_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_manual_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_multiple_sequential_loops(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_limit_cond_break_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0, sum = 0; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v); + sum += *v; + + i++; + if (i > 3) + break; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_SIMPLE: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_obfuscate_counter(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + /* Make i's initial value unknowable for verifier to prevent it from + * pruning if/else branch inside the loop body and marking i as precise. + */ + int i = zero; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + int x; + + i += 1; + + /* If we initialized i as `int i = 0;` above, verifier would + * track that i becomes 1 on first iteration after increment + * above, and here verifier would eagerly prune else branch + * and mark i as precise, ruining open-coded iterator logic + * completely, as each next iteration would have a different + * *precise* value of i, and thus there would be no + * convergence of state. This would result in reaching maximum + * instruction limit, no matter what the limit is. + */ + if (i == 1) + x = 123; + else + x = i * 3 + 1; + + bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x); + + sum += x; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_search_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, *elem = NULL; + bool found = false; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SEARCH_LOOP: v=%d", *v); + + if (*v == 2) { + found = true; + elem = v; + barrier_var(elem); + } + } + + /* should fail to verify if bpf_iter_num_destroy() is here */ + + if (found) + /* here found element will be wrong, we should have copied + * value to a variable, but here we want to make sure we can + * access memory after the loop anyways + */ + bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem); + else + bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n"); + + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_array_fill(const void *ctx) +{ + int sum, i; + + MY_PID_GUARD(); + + bpf_for(i, 0, ARRAY_SIZE(arr)) { + arr[i] = i * 2; + } + + sum = 0; + bpf_for(i, 0, ARRAY_SIZE(arr)) { + sum += arr[i]; + } + + bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256); + + return 0; +} + +static int arr2d[4][5]; +static int arr2d_row_sums[4]; +static int arr2d_col_sums[5]; + +SEC("raw_tp") +__success +int iter_nested_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + } + + bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_nested_deeply_iters(const void *ctx) +{ + int sum = 0; + + MY_PID_GUARD(); + + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + sum += 1; + } + } + } + } + /* validate that we can break from inside bpf_repeat() */ + break; + } + + return sum; +} + +static __noinline void fill_inner_dimension(int row) +{ + int col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } +} + +static __noinline int sum_inner_dimension(int row) +{ + int sum = 0, col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_subprog_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + fill_inner_dimension(row); + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + sum += sum_inner_dimension(row); + } + + bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d", + row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1000); +} arr_map SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int iter_err_too_permissive1(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = NULL; + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive2(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive3(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + found = true; + } + + if (found) + *map_val = 123; + + return 0; +} + +SEC("raw_tp") +__success +int iter_tricky_but_fine(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (map_val) { + found = true; + break; + } + } + + if (found) + *map_val = 123; + + return 0; +} + +#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0) + +SEC("raw_tp") +__success +int iter_stack_array_loop(const void *ctx) +{ + long arr1[16], arr2[16], sum = 0; + int *v, i; + + MY_PID_GUARD(); + + /* zero-init arr1 and arr2 in such a way that verifier doesn't know + * it's all zeros; if we don't do that, we'll make BPF verifier track + * all combination of zero/non-zero stack slots for arr1/arr2, which + * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different + * states + */ + __bpf_memzero(arr1, sizeof(arr1)); + __bpf_memzero(arr2, sizeof(arr1)); + + /* validate that we can break and continue when using bpf_for() */ + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + if (i & 1) { + arr1[i] = i; + continue; + } else { + arr2[i] = i; + break; + } + } + + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + sum += arr1[i] + arr2[i]; + } + + return sum; +} + +static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) +{ + int *t, i; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + arr[i] = i * mul; + } +} + +static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) +{ + int *t, i, sum = 0;; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + sum += arr[i]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_pass_iter_ptr_to_subprog(const void *ctx) +{ + int arr1[16], arr2[32]; + struct bpf_iter_num it; + int n, sum1, sum2; + + MY_PID_GUARD(); + + /* fill arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr1, n, 2); + bpf_iter_num_destroy(&it); + + /* fill arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr2, n, 10); + bpf_iter_num_destroy(&it); + + /* sum arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + sum1 = sum(&it, arr1, n); + bpf_iter_num_destroy(&it); + + /* sum arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + sum2 = sum(&it, arr2, n); + bpf_iter_num_destroy(&it); + + bpf_printk("sum1=%d, sum2=%d", sum1, sum2); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c new file mode 100644 index 0000000000000..05fa5ce7fc594 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success +int consume_first_item_only(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + "r0 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R0 invalid mem access 'scalar'") +int missing_null_check_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + /* FAIL: deref with no NULL check */ + "r1 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure +__msg("invalid access to memory, mem_size=4 off=0 size=8") +__msg("R0 min value is outside of the allowed memory range") +int wrong_sized_read_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + /* FAIL: deref more than available 4 bytes */ + "r0 = *(u64 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +int simplest_loop(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + "r6 = 0;" /* init sum */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto 2f;" + "r0 = *(u32 *)(r0 + 0);" + "r6 += r0;" /* accumulate sum */ + "goto 1b;" + + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c new file mode 100644 index 0000000000000..d47e59aba6de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int create_and_destroy(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int create_and_forget_to_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int destroy_without_creating_fail(void *ctx) +{ + /* init with zeros to stop verifier complaining about uninit stack */ + struct bpf_iter_num iter; + + asm volatile ( + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_direct_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* don't destroy iter, leaking ref, which should fail */ + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_helper_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* overwrite 8th byte with bpf_probe_read_kernel() */ + "r1 = %[iter];" + "r1 += 7;" + "r2 = 1;" + "r3 = 0;" /* NULL */ + "call %[bpf_probe_read_kernel];" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel) + : __clobber_common + ); + + return 0; +} + +static __noinline void subprog_with_iter(void) +{ + struct bpf_iter_num iter; + + bpf_iter_num_new(&iter, 0, 1); + + return; +} + +SEC("?raw_tp") +__failure +/* ensure there was a call to subprog, which might happen without __noinline */ +__msg("returning from callee:") +__msg("Unreleased reference id=1") +int leak_iter_from_subprog_fail(void *ctx) +{ + subprog_with_iter(); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int valid_stack_reuse(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + + /* now reuse same stack slots */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected uninitialized iter_num as arg #1") +int double_create_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* (attempt to) create iterator again */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int double_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* (attempt to) destroy iterator again */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_without_new_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_after_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid read from stack") +int __naked read_from_iter_slot_fail(void) +{ + asm volatile ( + /* r6 points to struct bpf_iter_num on the stack */ + "r6 = r10;" + "r6 += -24;" + + /* create iterator */ + "r1 = r6;" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* attemp to leak bpf_iter_num state */ + "r7 = *(u64 *)(r6 + 0);" + "r8 = *(u64 *)(r6 + 8);" + + /* destroy iterator */ + "r1 = r6;" + "call %[bpf_iter_num_destroy];" + + /* leak bpf_iter_num state */ + "r0 = r7;" + "if r7 > r8 goto +1;" + "r0 = r8;" + "exit;" + : + : ITER_HELPERS + : __clobber_common, "r6", "r7", "r8" + ); +} + +int zero; + +SEC("?raw_tp") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__msg("Unreleased reference") +int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* Create a fork in logic, with general setup as follows: + * - fallthrough (first) path is valid; + * - branch (second) path is invalid. + * Then depending on what we do in fallthrough vs branch path, + * we try to detect bugs in func_states_equal(), regsafe(), + * refsafe(), stack_safe(), and similar by tricking verifier + * into believing that branch state is a valid subset of + * a fallthrough state. Verifier should reject overall + * validation, unless there is a bug somewhere in verifier + * logic. + */ + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + + "if r6 > r7 goto bad;" /* fork */ + + /* spill r6 into stack slot of bpf_iter_num var */ + "*(u64 *)(%[iter] + 0) = r6;" + + "goto skip_bad;" + + "bad:" + /* create iterator in the same stack slot */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* but then forget about it and overwrite it back to r6 spill */ + "*(u64 *)(%[iter] + 0) = r6;" + + "skip_bad:" + "goto +0;" /* force checkpoint */ + + /* corrupt stack slots, if they are really dynptr */ + "*(u64 *)(%[iter] + 0) = r6;" + : + : __imm_ptr(iter), + __imm_addr(zero), + __imm(bpf_get_prandom_u32), + __imm(bpf_dynptr_from_mem), + ITER_HELPERS + : __clobber_common, "r6", "r7" + ); + + return 0; +} -- GitLab From f59b146092653bcf014ccdc9bd8bc94e79065ce3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:20 -0800 Subject: [PATCH 0255/2078] selftests/bpf: add number iterator tests Add number iterator (bpf_iter_num_{new,next,destroy}()) tests, validating the correct handling of various corner and common cases *at runtime*. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/iters.c | 49 ++++ tools/testing/selftests/bpf/progs/iters_num.c | 242 ++++++++++++++++++ 2 files changed, 291 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/iters_num.c diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 414fb8d82145a..2e7caff9523e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -6,10 +6,59 @@ #include "iters.skel.h" #include "iters_state_safety.skel.h" #include "iters_looping.skel.h" +#include "iters_num.skel.h" + +static void subtest_num_iters(void) +{ + struct iters_num *skel; + int err; + + skel = iters_num__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_num__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_num__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty_zero); + VALIDATE_CASE(empty_int_min); + VALIDATE_CASE(empty_int_max); + VALIDATE_CASE(empty_minus_one); + + VALIDATE_CASE(simple_sum); + VALIDATE_CASE(neg_sum); + VALIDATE_CASE(very_neg_sum); + VALIDATE_CASE(neg_pos_sum); + + VALIDATE_CASE(invalid_range); + VALIDATE_CASE(max_range); + VALIDATE_CASE(e2big_range); + + VALIDATE_CASE(succ_elem_cnt); + VALIDATE_CASE(overfetched_elem_cnt); + VALIDATE_CASE(fail_elem_cnt); + +#undef VALIDATE_CASE + +cleanup: + iters_num__destroy(skel); +} void test_iters(void) { RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + + if (test__start_subtest("num")) + subtest_num_iters(); } diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c new file mode 100644 index 0000000000000..7a77a8daee0dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_num.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +const volatile __s64 exp_empty_zero = 0 + 1; +__s64 res_empty_zero; + +SEC("raw_tp/sys_enter") +int num_empty_zero(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 0) sum += i; + res_empty_zero = 1 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_min = 0 + 2; +__s64 res_empty_int_min; + +SEC("raw_tp/sys_enter") +int num_empty_int_min(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN) sum += i; + res_empty_int_min = 2 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_max = 0 + 3; +__s64 res_empty_int_max; + +SEC("raw_tp/sys_enter") +int num_empty_int_max(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX, INT_MAX) sum += i; + res_empty_int_max = 3 + sum; + + return 0; +} + +const volatile __s64 exp_empty_minus_one = 0 + 4; +__s64 res_empty_minus_one; + +SEC("raw_tp/sys_enter") +int num_empty_minus_one(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -1, -1) sum += i; + res_empty_minus_one = 4 + sum; + + return 0; +} + +const volatile __s64 exp_simple_sum = 9 * 10 / 2; +__s64 res_simple_sum; + +SEC("raw_tp/sys_enter") +int num_simple_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 10) sum += i; + res_simple_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_sum = -11 * 10 / 2; +__s64 res_neg_sum; + +SEC("raw_tp/sys_enter") +int num_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -10, 0) sum += i; + res_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1); +__s64 res_very_neg_sum; + +SEC("raw_tp/sys_enter") +int num_very_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN + 2) sum += i; + res_very_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2); +__s64 res_very_big_sum; + +SEC("raw_tp/sys_enter") +int num_very_big_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX - 2, INT_MAX) sum += i; + res_very_big_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_pos_sum = -3; +__s64 res_neg_pos_sum; + +SEC("raw_tp/sys_enter") +int num_neg_pos_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -3, 3) sum += i; + res_neg_pos_sum = sum; + + return 0; +} + +const volatile __s64 exp_invalid_range = -EINVAL; +__s64 res_invalid_range; + +SEC("raw_tp/sys_enter") +int num_invalid_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_invalid_range = bpf_iter_num_new(&it, 1, 0); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_max_range = 0 + 10; +__s64 res_max_range; + +SEC("raw_tp/sys_enter") +int num_max_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_e2big_range = -E2BIG; +__s64 res_e2big_range; + +SEC("raw_tp/sys_enter") +int num_e2big_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_succ_elem_cnt = 10; +__s64 res_succ_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_succ_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v; + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + cnt++; + } + bpf_iter_num_destroy(&it); + + res_succ_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_overfetched_elem_cnt = 5; +__s64 res_overfetched_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_overfetched_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 0, 5); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_overfetched_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_fail_elem_cnt = 20 + 0; +__s64 res_fail_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_fail_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 100, 10); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_fail_elem_cnt = 20 + cnt; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From 7e86a8c4ac8d5dcf7dd58f5a4779d1a6ff0a827d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:21 -0800 Subject: [PATCH 0256/2078] selftests/bpf: implement and test custom testmod_seq iterator Implement a trivial iterator returning same specified integer value N times as part of bpf_testmod kernel module. Add selftests to validate everything works end to end. We also reuse these tests as "verification-only" tests to validate that kernel prints the state of custom kernel module-defined iterator correctly: fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0) "testmod_seq" part is an iterator type, and is coming from module's BTF data dynamically at runtime. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 42 +++++++++- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 6 ++ .../testing/selftests/bpf/prog_tests/iters.c | 42 ++++++++++ .../selftests/bpf/progs/iters_testmod_seq.c | 79 +++++++++++++++++++ 5 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/iters_testmod_seq.c diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index a02a085e7f32b..34cb8b2de8ca3 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -8,6 +8,7 @@ dynptr/test_dynptr_skb_data dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +iters/testmod_seq* # s390x doesn't support kfuncs in modules yet kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test # relies on fentry ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 46500636d8cd9..5e6e85c8d77de 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -65,6 +65,34 @@ bpf_testmod_test_mod_kfunc(int i) *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; } +__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) +{ + if (cnt < 0) { + it->cnt = 0; + return -EINVAL; + } + + it->value = value; + it->cnt = cnt; + + return 0; +} + +__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it) +{ + if (it->cnt <= 0) + return NULL; + + it->cnt--; + + return &it->value; +} + +__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) +{ + it->cnt = 0; +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -220,6 +248,17 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_SET8_END(bpf_testmod_common_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_testmod_common_kfunc_ids, +}; + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_SET8_END(bpf_testmod_check_kfunc_ids) @@ -235,7 +274,8 @@ static int bpf_testmod_init(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 0d71e2607832e..f32793efe095b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -22,4 +22,10 @@ struct bpf_testmod_test_writable_ctx { int val; }; +/* BPF iter that returns *value* *n* times in a row */ +struct bpf_iter_testmod_seq { + s64 value; + int cnt; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 2e7caff9523e1..10804ae5ae972 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -7,6 +7,7 @@ #include "iters_state_safety.skel.h" #include "iters_looping.skel.h" #include "iters_num.skel.h" +#include "iters_testmod_seq.skel.h" static void subtest_num_iters(void) { @@ -53,12 +54,53 @@ cleanup: iters_num__destroy(skel); } +static void subtest_testmod_seq_iters(void) +{ + struct iters_testmod_seq *skel; + int err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + skel = iters_testmod_seq__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_testmod_seq__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_testmod_seq__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty); + VALIDATE_CASE(full); + VALIDATE_CASE(truncated); + +#undef VALIDATE_CASE + +cleanup: + iters_testmod_seq__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + if (env.has_testmod) + RUN_TESTS(iters_testmod_seq); + if (test__start_subtest("num")) subtest_num_iters(); + if (test__start_subtest("testmod_seq")) + subtest_testmod_seq_iters(); } diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c new file mode 100644 index 0000000000000..3873fb6c292a1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +struct bpf_iter_testmod_seq { + u64 :64; + u64 :64; +}; + +extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym; +extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym; +extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym; + +const volatile __s64 exp_empty = 0 + 1; +__s64 res_empty; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_empty(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 0) sum += *i; + res_empty = 1 + sum; + + return 0; +} + +const volatile __s64 exp_full = 1000000; +__s64 res_full; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_full(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i; + res_full = sum; + + return 0; +} + +const volatile __s64 exp_truncated = 10 * 1000000; +__s64 res_truncated; + +static volatile int zero = 0; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_truncated(const void *ctx) +{ + __s64 sum = 0, *i; + int cnt = zero; + + bpf_for_each(testmod_seq, i, 10, 2000000) { + sum += *i; + cnt++; + if (cnt >= 1000000) + break; + } + res_truncated = sum; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From 6bf0ad7f29173869de6a5a359554426a7127c247 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 7 Mar 2023 17:30:35 +0100 Subject: [PATCH 0257/2078] ravb: remove R-Car H3 ES1.* handling R-Car H3 ES1.* was only available to an internal development group and needed a lot of quirks and workarounds. These become a maintenance burden now, so our development group decided to remove upstream support and disable booting for this SoC. Public users only have ES2 onwards. Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Signed-off-by: Simon Horman Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/all/20230307163041.3815-8-wsa+renesas@sang-engineering.com/ Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0f54849a38235..b81f0d8dfda81 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -1390,11 +1389,6 @@ static void ravb_adjust_link(struct net_device *ndev) phy_print_status(phydev); } -static const struct soc_device_attribute r8a7795es10[] = { - { .soc_id = "r8a7795", .revision = "ES1.0", }, - { /* sentinel */ } -}; - /* PHY init function */ static int ravb_phy_init(struct net_device *ndev) { @@ -1434,15 +1428,6 @@ static int ravb_phy_init(struct net_device *ndev) goto err_deregister_fixed_link; } - /* This driver only support 10/100Mbit speeds on R-Car H3 ES1.0 - * at this time. - */ - if (soc_device_match(r8a7795es10)) { - phy_set_max_speed(phydev, SPEED_100); - - netdev_info(ndev, "limited PHY to 100Mbit/s\n"); - } - if (!info->half_duplex) { /* 10BASE, Pause and Asym Pause is not supported */ phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); -- GitLab From 1de2a84dd06091cee943ab796660ba64c8479d33 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:12 -0600 Subject: [PATCH 0258/2078] alx: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Chris Snook Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/alx/main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 306393f8eecaa..49bb9a8f00e64 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -1745,7 +1744,6 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_pci_disable; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); if (!pdev->pm_cap) { @@ -1879,7 +1877,6 @@ out_free_netdev: free_netdev(netdev); out_pci_release: pci_release_mem_regions(pdev); - pci_disable_pcie_error_reporting(pdev); out_pci_disable: pci_disable_device(pdev); return err; @@ -1897,7 +1894,6 @@ static void alx_remove(struct pci_dev *pdev) iounmap(hw->hw_addr); pci_release_mem_regions(pdev); - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); mutex_destroy(&alx->mtx); -- GitLab From b4e24578b4841b6abac38fd40fddba6caa9a656a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:13 -0600 Subject: [PATCH 0259/2078] be2net: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_main.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 46fe3d74e2e98..aed1b622f51f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -16,7 +16,6 @@ #include "be.h" #include "be_cmds.h" #include -#include #include #include #include @@ -5726,8 +5725,6 @@ static void be_remove(struct pci_dev *pdev) be_unmap_pci_bars(adapter); be_drv_cleanup(adapter); - pci_disable_pcie_error_reporting(pdev); - pci_release_regions(pdev); pci_disable_device(pdev); @@ -5845,10 +5842,6 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) goto free_netdev; } - status = pci_enable_pcie_error_reporting(pdev); - if (!status) - dev_info(&pdev->dev, "PCIe error reporting enabled\n"); - status = be_map_pci_bars(adapter); if (status) goto free_netdev; @@ -5893,7 +5886,6 @@ drv_cleanup: unmap_bars: be_unmap_pci_bars(adapter); free_netdev: - pci_disable_pcie_error_reporting(pdev); free_netdev(netdev); rel_reg: pci_release_regions(pdev); -- GitLab From 5f00358b5e905a1db1077006d01cd5d207f91fb0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:14 -0600 Subject: [PATCH 0260/2078] bnx2: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. cd709aa90648 ("bnx2: Add PCI Advanced Error Reporting support.") added pci_enable_pcie_error_reporting() for all devices, and c239f279e571 ("bnx2: Enable AER on PCIE devices only") restricted it to BNX2_CHIP_5709 devices to avoid an error message when it failed on non-PCIe devices. The PCI core only enables PCIe error reporting on PCIe devices, which I assume means BNX2_CHIP_5709. Signed-off-by: Bjorn Helgaas Cc: Rasesh Mody Cc: GR-Linux-NIC-Dev@marvell.com Reviewed-by: Michael Chan Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2.c | 21 --------------------- drivers/net/ethernet/broadcom/bnx2.h | 1 - 2 files changed, 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 9f473854b0f44..a66137b8d1a63 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #if IS_ENABLED(CONFIG_CNIC) @@ -8093,7 +8092,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) int rc, i, j; u32 reg; u64 dma_mask, persist_dma_mask; - int err; SET_NETDEV_DEV(dev, &pdev->dev); bp = netdev_priv(dev); @@ -8176,12 +8174,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->flags |= BNX2_FLAG_PCIE; if (BNX2_CHIP_REV(bp) == BNX2_CHIP_REV_Ax) bp->flags |= BNX2_FLAG_JUMBO_BROKEN; - - /* AER (Advanced Error Reporting) hooks */ - err = pci_enable_pcie_error_reporting(pdev); - if (!err) - bp->flags |= BNX2_FLAG_AER_ENABLED; - } else { bp->pcix_cap = pci_find_capability(pdev, PCI_CAP_ID_PCIX); if (bp->pcix_cap == 0) { @@ -8460,11 +8452,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) return 0; err_out_unmap: - if (bp->flags & BNX2_FLAG_AER_ENABLED) { - pci_disable_pcie_error_reporting(pdev); - bp->flags &= ~BNX2_FLAG_AER_ENABLED; - } - pci_iounmap(pdev, bp->regview); bp->regview = NULL; @@ -8638,11 +8625,6 @@ bnx2_remove_one(struct pci_dev *pdev) bnx2_free_stats_blk(dev); kfree(bp->temp_stats_blk); - if (bp->flags & BNX2_FLAG_AER_ENABLED) { - pci_disable_pcie_error_reporting(pdev); - bp->flags &= ~BNX2_FLAG_AER_ENABLED; - } - bnx2_release_firmware(bp); free_netdev(dev); @@ -8766,9 +8748,6 @@ static pci_ers_result_t bnx2_io_slot_reset(struct pci_dev *pdev) } rtnl_unlock(); - if (!(bp->flags & BNX2_FLAG_AER_ENABLED)) - return result; - return result; } diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index a09ec47461c90..315b08c64edd8 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -6808,7 +6808,6 @@ struct bnx2 { #define BNX2_FLAG_JUMBO_BROKEN 0x00000800 #define BNX2_FLAG_CAN_KEEP_VLAN 0x00001000 #define BNX2_FLAG_BROKEN_STATS 0x00002000 -#define BNX2_FLAG_AER_ENABLED 0x00004000 struct bnx2_napi bnx2_napi[BNX2_MAX_MSIX_VEC]; -- GitLab From 2fba753cc9b57d82800db0997b9271898323c57e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:15 -0600 Subject: [PATCH 0261/2078] bnx2x: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Ariel Elior Cc: Sudarsana Kalluru Cc: Manish Chopra Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 - .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 19 ------------------- 2 files changed, 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index dd5945c4bfec2..8bcde0a6e011c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1486,7 +1486,6 @@ struct bnx2x { #define IS_VF_FLAG (1 << 22) #define BC_SUPPORTS_RMMOD_CMD (1 << 23) #define HAS_PHYS_PORT_ID (1 << 24) -#define AER_ENABLED (1 << 25) #define PTP_SUPPORTED (1 << 26) #define TX_TIMESTAMPING_EN (1 << 27) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 5d1e4fe335aaa..3bb5ea570c876 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -13037,14 +13036,6 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_features_check = bnx2x_features_check, }; -static void bnx2x_disable_pcie_error_reporting(struct bnx2x *bp) -{ - if (bp->flags & AER_ENABLED) { - pci_disable_pcie_error_reporting(bp->pdev); - bp->flags &= ~AER_ENABLED; - } -} - static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, struct net_device *dev, unsigned long board_type) { @@ -13157,13 +13148,6 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, /* Set PCIe reset type to fundamental for EEH recovery */ pdev->needs_freset = 1; - /* AER (Advanced Error reporting) configuration */ - rc = pci_enable_pcie_error_reporting(pdev); - if (!rc) - bp->flags |= AER_ENABLED; - else - BNX2X_DEV_INFO("Failed To configure PCIe AER [%d]\n", rc); - /* * Clean the following indirect addresses for all functions since it * is not used by the driver. @@ -14020,8 +14004,6 @@ init_one_freemem: bnx2x_free_mem_bp(bp); init_one_exit: - bnx2x_disable_pcie_error_reporting(bp); - if (bp->regview) iounmap(bp->regview); @@ -14102,7 +14084,6 @@ static void __bnx2x_remove(struct pci_dev *pdev, pci_set_power_state(pdev, PCI_D3hot); } - bnx2x_disable_pcie_error_reporting(bp); if (remove_netdev) { if (bp->regview) iounmap(bp->regview); -- GitLab From 5f29b73d4eba2926ab99d7bf5f2028810af3c66c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:16 -0600 Subject: [PATCH 0262/2078] bnxt: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Reviewed-by: Michael Chan Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d4b1f2ebeac7..7245fee13ad05 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -12706,8 +12705,6 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) goto init_err_release; } - pci_enable_pcie_error_reporting(pdev); - INIT_WORK(&bp->sp_task, bnxt_sp_task); INIT_DELAYED_WORK(&bp->fw_reset_task, bnxt_fw_reset_task); @@ -13187,7 +13184,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_rdma_aux_device_uninit(bp); bnxt_ptp_clear(bp); - pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ -- GitLab From ca7f175fc24eea4638f93a4a49d229c34ae0c770 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:17 -0600 Subject: [PATCH 0263/2078] cxgb4: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Raju Rangoju Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 7db2403c4c9c9..f0bc7396ce2b1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -6687,7 +6686,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); pci_save_state(pdev); adap_idx++; @@ -7092,7 +7090,6 @@ fw_attach_fail: out_unmap_bar0: iounmap(regs); out_disable_device: - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); out_release_regions: pci_release_regions(pdev); @@ -7171,7 +7168,6 @@ static void remove_one(struct pci_dev *pdev) } #endif iounmap(adapter->regs); - pci_disable_pcie_error_reporting(pdev); if ((adapter->flags & CXGB4_DEV_ENABLED)) { pci_disable_device(pdev); adapter->flags &= ~CXGB4_DEV_ENABLED; -- GitLab From 49f79ac22f89476cb2eac018676490e33a7c4a10 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:18 -0600 Subject: [PATCH 0264/2078] net/fungible: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Acked-by: Dimitris Michailidis Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/fungible/funcore/fun_dev.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c index fb5120d90f26a..3680f83feba23 100644 --- a/drivers/net/ethernet/fungible/funcore/fun_dev.c +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) -#include #include #include #include @@ -748,7 +747,6 @@ void fun_dev_disable(struct fun_dev *fdev) pci_free_irq_vectors(pdev); pci_clear_master(pdev); - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); fun_unmap_bars(fdev); @@ -781,8 +779,6 @@ int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, goto unmap; } - pci_enable_pcie_error_reporting(pdev); - rc = sanitize_dev(fdev); if (rc) goto disable_dev; @@ -830,7 +826,6 @@ free_irq_mgr: free_irqs: pci_free_irq_vectors(pdev); disable_dev: - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); unmap: fun_unmap_bars(fdev); -- GitLab From c183033f631a9265e5f85827b1cfcdb8d5a7ee35 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:19 -0600 Subject: [PATCH 0265/2078] net: hns3: remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Yisen Zhuang Cc: Salil Mehta Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 25be7f8ac7cd5..5caea154362f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include -- GitLab From 2d0e0372069d3050479d62d48f3b04445f60083a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:20 -0600 Subject: [PATCH 0266/2078] netxen_nic: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Also note that the driver only called these for NX_IS_REVISION_P3 devices, so since f26e58bf6f54, error reporting has been enabled for devices other than NX_IS_REVISION_P3. Signed-off-by: Bjorn Helgaas Cc: Manish Chopra Cc: Rahul Verma Cc: GR-Linux-NIC-Dev@marvell.com Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index de8d54b23f738..59d0dd862fd12 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -18,7 +18,6 @@ #include #include #include -#include MODULE_DESCRIPTION("QLogic/NetXen (1/10) GbE Intelligent Ethernet Driver"); MODULE_LICENSE("GPL"); @@ -1464,9 +1463,6 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((err = pci_request_regions(pdev, netxen_nic_driver_name))) goto err_out_disable_pdev; - if (NX_IS_REVISION_P3(pdev->revision)) - pci_enable_pcie_error_reporting(pdev); - pci_set_master(pdev); netdev = alloc_etherdev(sizeof(struct netxen_adapter)); @@ -1603,8 +1599,6 @@ err_out_free_netdev: free_netdev(netdev); err_out_free_res: - if (NX_IS_REVISION_P3(pdev->revision)) - pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); err_out_disable_pdev: @@ -1659,10 +1653,8 @@ static void netxen_nic_remove(struct pci_dev *pdev) netxen_release_firmware(adapter); - if (NX_IS_REVISION_P3(pdev->revision)) { + if (NX_IS_REVISION_P3(pdev->revision)) netxen_cleanup_minidump(adapter); - pci_disable_pcie_error_reporting(pdev); - } pci_release_regions(pdev); pci_disable_device(pdev); -- GitLab From fe3f4c292da1908eda5d322e60b3c09ed9508288 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:21 -0600 Subject: [PATCH 0267/2078] octeon_ep: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Veerasenareddy Burru Cc: Abhijit Ayarekar Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 5a898fb88e375..fdce78ceea87e 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -1050,7 +1049,6 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pci_regions; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); netdev = alloc_etherdev_mq(sizeof(struct octep_device), @@ -1106,7 +1104,6 @@ register_dev_err: err_octep_config: free_netdev(netdev); err_alloc_netdev: - pci_disable_pcie_error_reporting(pdev); pci_release_mem_regions(pdev); err_pci_regions: err_dma_mask: @@ -1139,7 +1136,6 @@ static void octep_remove(struct pci_dev *pdev) octep_device_cleanup(oct); pci_release_mem_regions(pdev); free_netdev(netdev); - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } -- GitLab From 1263c7b78315f6682a106d56e9878b3a7b176660 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:22 -0600 Subject: [PATCH 0268/2078] qed: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Ariel Elior Cc: Manish Chopra Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_main.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c91898be7c030..f5af83342856f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include "qed.h" @@ -259,8 +258,6 @@ static void qed_free_pci(struct qed_dev *cdev) { struct pci_dev *pdev = cdev->pdev; - pci_disable_pcie_error_reporting(pdev); - if (cdev->doorbells && cdev->db_size) iounmap(cdev->doorbells); if (cdev->regview) @@ -366,12 +363,6 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev) return -ENOMEM; } - /* AER (Advanced Error reporting) configuration */ - rc = pci_enable_pcie_error_reporting(pdev); - if (rc) - DP_VERBOSE(cdev, NETIF_MSG_DRV, - "Failed to configure PCIe AER [%d]\n", rc); - return 0; err2: -- GitLab From 5f1fbdc168f4bf9705b81a8844d0f8ca201a7363 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:23 -0600 Subject: [PATCH 0269/2078] net: qede: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Ariel Elior Cc: Manish Chopra Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 261f982ca40da..4c6c685820e3f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "qede.h" #include "qede_ptp.h" -- GitLab From 95e35f5994075090ed086ad3bcef33878218784e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:24 -0600 Subject: [PATCH 0270/2078] qlcnic: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Shahed Shaikh Cc: Manish Chopra Cc: GR-Linux-NIC-Dev@marvell.com Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 ---- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 1 - 2 files changed, 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 44dac3c0908eb..90df4a0909fa2 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -2445,7 +2444,6 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_disable_pdev; pci_set_master(pdev); - pci_enable_pcie_error_reporting(pdev); ahw = kzalloc(sizeof(struct qlcnic_hardware_context), GFP_KERNEL); if (!ahw) { @@ -2675,7 +2673,6 @@ err_out_free_hw_res: kfree(ahw); err_out_free_res: - pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); err_out_disable_pdev: @@ -2757,7 +2754,6 @@ static void qlcnic_remove(struct pci_dev *pdev) qlcnic_release_firmware(adapter); - pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 5c2edb715d3e4..74125188beb82 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #ifdef CONFIG_QLCNIC_HWMON #include -- GitLab From e07ce5567194dd14f4fcbbaac4a1c48bb97da8e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:25 -0600 Subject: [PATCH 0271/2078] qlcnic: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Shahed Shaikh Cc: Manish Chopra Cc: GR-Linux-NIC-Dev@marvell.com Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 2fd5c6fdb5003..bcef8ab715bfa 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -8,7 +8,6 @@ #include #include #include -#include #include "qlcnic.h" #include "qlcnic_sriov.h" -- GitLab From bdedf705688c7704f2efc3ce4930a32612e4efdc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:26 -0600 Subject: [PATCH 0272/2078] sfc: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Martin Habets Acked-by: Edward Cree Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 02c2adeb0a120..92c390ec4735d 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "net_driver.h" #include @@ -892,8 +891,6 @@ static void efx_pci_remove(struct pci_dev *pci_dev) free_netdev(efx->net_dev); probe_data = container_of(efx, struct efx_probe_data, efx); kfree(probe_data); - - pci_disable_pcie_error_reporting(pci_dev); }; /* NIC VPD information @@ -1126,8 +1123,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_warn(efx, probe, efx->net_dev, "failed to create MTDs (%d)\n", rc); - (void)pci_enable_pcie_error_reporting(pci_dev); - if (efx->type->udp_tnl_push_ports) efx->type->udp_tnl_push_ports(efx); -- GitLab From 4ac9272691a402d5e862be627bac54d2cd6dacbc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:27 -0600 Subject: [PATCH 0273/2078] sfc: falcon: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Martin Habets Acked-by: Edward Cree Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/falcon/efx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index e151b09577517..e001f27085c66 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include "net_driver.h" #include "efx.h" @@ -2765,8 +2764,6 @@ static void ef4_pci_remove(struct pci_dev *pci_dev) ef4_fini_struct(efx); free_netdev(efx->net_dev); - - pci_disable_pcie_error_reporting(pci_dev); }; /* NIC VPD information @@ -2927,12 +2924,6 @@ static int ef4_pci_probe(struct pci_dev *pci_dev, netif_warn(efx, probe, efx->net_dev, "failed to create MTDs (%d)\n", rc); - rc = pci_enable_pcie_error_reporting(pci_dev); - if (rc && rc != -EINVAL) - netif_notice(efx, probe, efx->net_dev, - "PCIE error reporting unavailable (%d).\n", - rc); - return 0; fail4: -- GitLab From ecded61ceb89880f0b0490a168d18c9845b60eb3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:28 -0600 Subject: [PATCH 0274/2078] sfc/siena: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Martin Habets Acked-by: Edward Cree Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/siena/efx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c index ef52ec71d197f..8c557f6a183c9 100644 --- a/drivers/net/ethernet/sfc/siena/efx.c +++ b/drivers/net/ethernet/sfc/siena/efx.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "net_driver.h" #include @@ -874,8 +873,6 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_siena_fini_struct(efx); free_netdev(efx->net_dev); - - pci_disable_pcie_error_reporting(pci_dev); }; /* NIC VPD information @@ -1094,8 +1091,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_warn(efx, probe, efx->net_dev, "failed to create MTDs (%d)\n", rc); - (void)pci_enable_pcie_error_reporting(pci_dev); - if (efx->type->udp_tnl_push_ports) efx->type->udp_tnl_push_ports(efx); -- GitLab From c39abdd396bc2b299798e8422538806a8dc29d20 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:29 -0600 Subject: [PATCH 0275/2078] sfc_ef100: Drop redundant pci_disable_pcie_error_reporting() 51b35a454efd ("sfc: skeleton EF100 PF driver") added a call to pci_disable_pcie_error_reporting() in ef100_pci_remove(). Remove this call since there's no apparent reason to disable error reporting when it was not previously enabled. Note that since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core enables PCIe error reporting for all devices during enumeration, so the driver doesn't need to do it itself. Signed-off-by: Bjorn Helgaas Cc: Martin Habets Acked-by: Edward Cree Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef100.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c index 71aab3d0480fe..6334992b0af4f 100644 --- a/drivers/net/ethernet/sfc/ef100.c +++ b/drivers/net/ethernet/sfc/ef100.c @@ -11,7 +11,6 @@ #include "net_driver.h" #include -#include #include "efx_common.h" #include "efx_channels.h" #include "io.h" @@ -440,8 +439,6 @@ static void ef100_pci_remove(struct pci_dev *pci_dev) pci_dbg(pci_dev, "shutdown successful\n"); - pci_disable_pcie_error_reporting(pci_dev); - pci_set_drvdata(pci_dev, NULL); efx_fini_struct(efx); kfree(probe_data); -- GitLab From a7edf8e5142f2037db944ec87c0f2491dadfe2a6 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:30 -0600 Subject: [PATCH 0276/2078] net: ngbe: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Jiawen Wu Cc: Mengyuan Lou Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 5b564d348c091..0e4163e1106f1 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -520,7 +519,6 @@ static int ngbe_probe(struct pci_dev *pdev, goto err_pci_disable_dev; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); netdev = devm_alloc_etherdev_mqs(&pdev->dev, @@ -669,7 +667,6 @@ err_clear_interrupt_scheme: err_free_mac_table: kfree(wx->mac_table); err_pci_release_regions: - pci_disable_pcie_error_reporting(pdev); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_disable_dev: @@ -698,7 +695,6 @@ static void ngbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } -- GitLab From 1fccc781bf7e32b89ecb289987670b040f97d7c0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:31 -0600 Subject: [PATCH 0277/2078] net: txgbe: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Cc: Jiawen Wu Cc: Mengyuan Lou Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 6c0a982305576..859feaafd3500 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -538,7 +537,6 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_pci_disable_dev; } - pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); netdev = devm_alloc_etherdev_mqs(&pdev->dev, @@ -698,7 +696,6 @@ err_release_hw: err_free_mac_table: kfree(wx->mac_table); err_pci_release_regions: - pci_disable_pcie_error_reporting(pdev); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_disable_dev: @@ -729,8 +726,6 @@ static void txgbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); } -- GitLab From ab76f2bff0f3d165428fb71532c7e6ba81df8317 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:32 -0600 Subject: [PATCH 0278/2078] e1000e: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: intel-wired-lan@lists.osuosl.org Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/e1000e/netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e1eb1de88bf92..6f5c16aebcbf0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include -- GitLab From 8be901a6715f290131ca919bef425717538382b5 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:33 -0600 Subject: [PATCH 0279/2078] fm10k: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 027d721feb189..d748b98274e79 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -3,7 +3,6 @@ #include #include -#include #include "fm10k.h" -- GitLab From acd2bb015fae8d67ff11d11dfe250b0fb6c6081c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:34 -0600 Subject: [PATCH 0280/2078] i40e: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 60ce4d15d82a9..6e310a5394678 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include -- GitLab From 495b72c79302bb827ff14686000edbba1dd9e372 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:35 -0600 Subject: [PATCH 0281/2078] iavf: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 232bc61d9eee9..2cdce251472c0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include -- GitLab From ddd652ef30e396efe006494a67e43e174099b419 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:36 -0600 Subject: [PATCH 0282/2078] ice: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index b0e29e3424018..d79a48d27f1a8 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include -- GitLab From 648a2020fdac17591b909849513a10a65daa8b27 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:37 -0600 Subject: [PATCH 0283/2078] igb: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 03bc1e8af575f..a2914298dd692 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include -- GitLab From 1530522f101f5f58f73bb3c9f5b1be39cb1c2a62 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:38 -0600 Subject: [PATCH 0284/2078] igc: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2928a6c736928..d7ee06d28b505 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include -- GitLab From f3468e3944398c4de9b2deff73041394fba632e7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 12:19:39 -0600 Subject: [PATCH 0285/2078] ixgbe: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Jesse Brandeburg Cc: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 8736ca4b26289..63d4e32df0295 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include -- GitLab From 04eb3d1cede014defaa8c259263439cc7e9ceda6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Mar 2023 16:19:26 +0000 Subject: [PATCH 0286/2078] net: mtk_eth_soc: tidy mtk_gmac0_rgmii_adjust() Get rid of the multiple tenary operators in mtk_gmac0_rgmii_adjust() replacing them with a single if(), thus making the code easier to read. Signed-off-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 34 ++++++++++++--------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 14be6ea51b889..c63f17929ccf4 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -397,38 +397,42 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth, static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, phy_interface_t interface, int speed) { - u32 val; + unsigned long rate; + u32 tck, rck, intf; int ret; if (interface == PHY_INTERFACE_MODE_TRGMII) { mtk_w32(eth, TRGMII_MODE, INTF_MODE); - val = 500000000; - ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], 500000000); if (ret) dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); return; } - val = (speed == SPEED_1000) ? - INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; - mtk_w32(eth, val, INTF_MODE); + if (speed == SPEED_1000) { + intf = INTF_MODE_RGMII_1000; + rate = 250000000; + rck = RCK_CTRL_RGMII_1000; + tck = TCK_CTRL_RGMII_1000; + } else { + intf = INTF_MODE_RGMII_10_100; + rate = 500000000; + rck = RCK_CTRL_RGMII_10_100; + tck = TCK_CTRL_RGMII_10_100; + } + + mtk_w32(eth, intf, INTF_MODE); regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, ETHSYS_TRGMII_CLK_SEL362_5, ETHSYS_TRGMII_CLK_SEL362_5); - val = (speed == SPEED_1000) ? 250000000 : 500000000; - ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], rate); if (ret) dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); - val = (speed == SPEED_1000) ? - RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100; - mtk_w32(eth, val, TRGMII_RCK_CTRL); - - val = (speed == SPEED_1000) ? - TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100; - mtk_w32(eth, val, TRGMII_TCK_CTRL); + mtk_w32(eth, rck, TRGMII_RCK_CTRL); + mtk_w32(eth, tck, TRGMII_TCK_CTRL); } static struct phylink_pcs *mtk_mac_select_pcs(struct phylink_config *config, -- GitLab From 7910898e1b2ae8ba0646434294931bf69cc0fdb5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Mar 2023 16:19:31 +0000 Subject: [PATCH 0287/2078] net: mtk_eth_soc: move trgmii ddr2 check to probe function If TRGMII mode is not permitted when using DDR2 mode, we should handle that when setting up phylink's ->supported_interfaces so phylink knows that this is not supported by the hardware. Move this check to mtk_add_mac(). Signed-off-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c63f17929ccf4..1b385dfe620f3 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -374,17 +374,6 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth, { u32 val; - /* Check DDR memory type. - * Currently TRGMII mode with DDR2 memory is not supported. - */ - regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val); - if (interface == PHY_INTERFACE_MODE_TRGMII && - val & SYSCFG_DRAM_TYPE_DDR2) { - dev_err(eth->dev, - "TRGMII mode with DDR2 memory is not supported!\n"); - return -EOPNOTSUPP; - } - val = (interface == PHY_INTERFACE_MODE_TRGMII) ? ETHSYS_TRGMII_MT7621_DDR_PLL : 0; @@ -4333,6 +4322,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) struct mtk_mac *mac; int id, err; int txqs = 1; + u32 val; if (!_id) { dev_err(eth->dev, "missing mac id\n"); @@ -4409,6 +4399,15 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) __set_bit(PHY_INTERFACE_MODE_TRGMII, mac->phylink_config.supported_interfaces); + /* TRGMII is not permitted on MT7621 if using DDR2 */ + if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) && + MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII_MT7621_CLK)) { + regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val); + if (val & SYSCFG_DRAM_TYPE_DDR2) + __clear_bit(PHY_INTERFACE_MODE_TRGMII, + mac->phylink_config.supported_interfaces); + } + if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII)) { __set_bit(PHY_INTERFACE_MODE_SGMII, mac->phylink_config.supported_interfaces); -- GitLab From c9f9e3a3289f2f068db45ea1219aa5b295762f44 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Mar 2023 16:19:36 +0000 Subject: [PATCH 0288/2078] net: mtk_eth_soc: remove unnecessary checks in mtk_mac_config() mtk_mac_config() checks that the interface mode is permitted for the capabilities, but we already do these checks in mtk_add_mac() when initialising phylink's supported_interfaces bitmap. Remove the unnecessary tests. Signed-off-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1b385dfe620f3..f78810717f66f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -458,12 +458,6 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, /* Setup soc pin functions */ switch (state->interface) { case PHY_INTERFACE_MODE_TRGMII: - if (mac->id) - goto err_phy; - if (!MTK_HAS_CAPS(mac->hw->soc->caps, - MTK_GMAC1_TRGMII)) - goto err_phy; - fallthrough; case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: @@ -480,11 +474,9 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_2500BASEX: case PHY_INTERFACE_MODE_SGMII: - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) { - err = mtk_gmac_sgmii_path_setup(eth, mac->id); - if (err) - goto init_err; - } + err = mtk_gmac_sgmii_path_setup(eth, mac->id); + if (err) + goto init_err; break; case PHY_INTERFACE_MODE_GMII: if (MTK_HAS_CAPS(eth->soc->caps, MTK_GEPHY)) { -- GitLab From 8cd9de08ccf6e1575d4994ba8347fa9ac083f900 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Mar 2023 16:19:41 +0000 Subject: [PATCH 0289/2078] net: mtk_eth_soc: remove support for RMII and REVMII modes Since the conversion of mtk_eth_soc to phylink's supported_interfaces bitmap, these two modes have not been selectable. No one has raised this as an issue. Checking the in-kernel DT files, none of them use either of these modes with this hardware. Daniel Golle concurs: A quick grep through the device trees of the more than 650 ramips and mediatek boards we support in OpenWrt has revealed that *none* of them uses either reduced-MII or reverse-MII PHY modes. I could imaging that some more specialized ramips boards may use the RMII 100M PHY mode to connect with exotic PHYs for industrial or automotive applications (think: for 100BASE-T1 PHY connected via RMII). I have never seen or touched such boards, but there are hints that they do exist. For reverse-MII there are cases in which the Ralink SoC (Rt305x, for example) is used in iNIC mode, ie. connected as a PHY to another SoC, and running only a minimal firmware rather than running Linux. Due to the lack of external DRAM for the Ralink SoC on this kind of boards, the Ralink SoC there will anyway never be able to boot Linux. I've seen this e.g. in multimedia devices like early WiFi-connected not-yet-so-smart TVs. Consequently, the conclusion is that no one uses these modes with this hardware, so we might as well drop support for them. Signed-off-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f78810717f66f..b22cd160554ee 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -463,8 +463,6 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_REVMII: - case PHY_INTERFACE_MODE_RMII: if (MTK_HAS_CAPS(eth->soc->caps, MTK_RGMII)) { err = mtk_gmac_rgmii_path_setup(eth, mac->id); if (err) @@ -524,21 +522,13 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, } } - ge_mode = 0; switch (state->interface) { case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_GMII: ge_mode = 1; break; - case PHY_INTERFACE_MODE_REVMII: - ge_mode = 2; - break; - case PHY_INTERFACE_MODE_RMII: - if (mac->id) - goto err_phy; - ge_mode = 3; - break; default: + ge_mode = 0; break; } -- GitLab From 4821a076eb602a6238528e9ebafeac853c833415 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:23:26 -0500 Subject: [PATCH 0290/2078] sctp: add fair capacity stream scheduler As it says in rfc8260#section-3.5 about the fair capacity scheduler: A fair capacity distribution between the streams is used. This scheduler considers the lengths of the messages of each stream and schedules them in a specific way to maintain an equal capacity for all streams. The details are implementation dependent. interleaving user messages allows for a better realization of the fair capacity usage. This patch adds Fair Capacity Scheduler based on the foundations added by commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"): A fc_list and a fc_length are added into struct sctp_stream_out_ext and a fc_list is added into struct sctp_stream. In .enqueue, when there are chunks enqueued into a stream, this stream will be linked into stream-> fc_list by its fc_list ordered by its fc_length. In .dequeue, it always picks up the 1st skb from stream->fc_list. In .dequeue_done, fc_length is increased by chunk's len and update its location in stream->fc_list according to the its new fc_length. Note that when the new fc_length overflows in .dequeue_done, instead of resetting all fc_lengths to 0, we only reduced them by U32_MAX / 4 to avoid a moment of imbalance in the scheduling, as Marcelo suggested. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni --- include/net/sctp/stream_sched.h | 1 + include/net/sctp/structs.h | 7 ++ include/uapi/linux/sctp.h | 3 +- net/sctp/Makefile | 3 +- net/sctp/stream_sched.c | 1 + net/sctp/stream_sched_fc.c | 183 ++++++++++++++++++++++++++++++++ 6 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 net/sctp/stream_sched_fc.c diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index fa00dc20a0d73..913170710adb6 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -58,5 +58,6 @@ void sctp_sched_ops_register(enum sctp_sched_type sched, struct sctp_sched_ops *sched_ops); void sctp_sched_ops_prio_init(void); void sctp_sched_ops_rr_init(void); +void sctp_sched_ops_fc_init(void); #endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e1f6e7fc2b11e..2f1c9f50b3523 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1429,6 +1429,10 @@ struct sctp_stream_out_ext { struct { struct list_head rr_list; }; + struct { + struct list_head fc_list; + __u32 fc_length; + }; }; }; @@ -1475,6 +1479,9 @@ struct sctp_stream { /* The next stream in line */ struct sctp_stream_out_ext *rr_next; }; + struct { + struct list_head fc_list; + }; }; struct sctp_stream_interleave *si; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ed7d4ecbf53dd..6814c5a1c4bcf 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1211,7 +1211,8 @@ enum sctp_sched_type { SCTP_SS_DEFAULT = SCTP_SS_FCFS, SCTP_SS_PRIO, SCTP_SS_RR, - SCTP_SS_MAX = SCTP_SS_RR + SCTP_SS_FC, + SCTP_SS_MAX = SCTP_SS_FC }; /* Probe Interval socket option */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index e845e45885352..0448398408d8a 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -13,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ offload.o stream_sched.o stream_sched_prio.o \ - stream_sched_rr.o stream_interleave.o + stream_sched_rr.o stream_sched_fc.o \ + stream_interleave.o sctp_diag-y := diag.o diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 330067002debe..1ebd14ef8daac 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -124,6 +124,7 @@ void sctp_sched_ops_init(void) sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); + sctp_sched_ops_fc_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c new file mode 100644 index 0000000000000..b336c2f5486b3 --- /dev/null +++ b/net/sctp/stream_sched_fc.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2022 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Xin Long + */ + +#include +#include +#include +#include + +/* Fair Capacity handling + * RFC 8260 section 3.5 + */ +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream); + +static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fc_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_fc_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->fc_list); + + return 0; +} + +static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + INIT_LIST_HEAD(&soute->fc_list); + soute->fc_length = 0; + + return 0; +} + +static void sctp_sched_fc_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + +static void sctp_sched_fc_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_out_ext *pos; + + if (!list_empty(&soute->fc_list)) + return; + + list_for_each_entry(pos, &stream->fc_list, fc_list) + if (pos->fc_length >= soute->fc_length) + break; + list_add_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); +} + +static struct sctp_chunk *sctp_sched_fc_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + return NULL; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = list_entry(stream->fc_list.next, struct sctp_stream_out_ext, fc_list); + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + return ch; +} + +static void sctp_sched_fc_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute, *pos; + __u16 sid, i; + + sid = sctp_chunk_stream_no(ch); + soute = SCTP_SO(stream, sid)->ext; + /* reduce all fc_lengths by U32_MAX / 4 if the current fc_length overflows. */ + if (soute->fc_length > U32_MAX - ch->skb->len) { + for (i = 0; i < stream->outcnt; i++) { + pos = SCTP_SO(stream, i)->ext; + if (!pos) + continue; + if (pos->fc_length <= (U32_MAX >> 2)) { + pos->fc_length = 0; + continue; + } + pos->fc_length -= (U32_MAX >> 2); + } + } + soute->fc_length += ch->skb->len; + + if (list_empty(&soute->outq)) { + list_del_init(&soute->fc_list); + return; + } + + pos = soute; + list_for_each_entry_continue(pos, &stream->fc_list, fc_list) + if (pos->fc_length >= soute->fc_length) + break; + list_move_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (SCTP_SO(stream, sid)->ext) + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); + } +} + +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->fc_list, fc_list) + list_del_init(&soute->fc_list); +} + +static struct sctp_sched_ops sctp_sched_fc = { + .set = sctp_sched_fc_set, + .get = sctp_sched_fc_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_fc_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc); +} -- GitLab From 42d452e7709fdb4d42376d2a97369e22cc80a5d2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:23:27 -0500 Subject: [PATCH 0291/2078] sctp: add weighted fair queueing stream scheduler As it says in rfc8260#section-3.6 about the weighted fair queueing scheduler: A Weighted Fair Queueing scheduler between the streams is used. The weight is configurable per outgoing SCTP stream. This scheduler considers the lengths of the messages of each stream and schedules them in a specific way to use the capacity according to the given weights. If the weight of stream S1 is n times the weight of stream S2, the scheduler should assign to stream S1 n times the capacity it assigns to stream S2. The details are implementation dependent. Interleaving user messages allows for a better realization of the capacity usage according to the given weights. This patch adds Weighted Fair Queueing Scheduler actually based on the code of Fair Capacity Scheduler by adding fc_weight into struct sctp_stream_out_ext and taking it into account when sorting stream-> fc_list in sctp_sched_fc_sched() and sctp_sched_fc_dequeue_done(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni --- include/net/sctp/stream_sched.h | 1 + include/net/sctp/structs.h | 1 + include/uapi/linux/sctp.h | 3 +- net/sctp/stream_sched.c | 1 + net/sctp/stream_sched_fc.c | 50 ++++++++++++++++++++++++++++++--- 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 913170710adb6..572d73fdcd5ea 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -59,5 +59,6 @@ void sctp_sched_ops_register(enum sctp_sched_type sched, void sctp_sched_ops_prio_init(void); void sctp_sched_ops_rr_init(void); void sctp_sched_ops_fc_init(void); +void sctp_sched_ops_wfq_init(void); #endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2f1c9f50b3523..a0933efd93c3e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1432,6 +1432,7 @@ struct sctp_stream_out_ext { struct { struct list_head fc_list; __u32 fc_length; + __u16 fc_weight; }; }; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6814c5a1c4bcf..b7d91d4cf0db5 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1212,7 +1212,8 @@ enum sctp_sched_type { SCTP_SS_PRIO, SCTP_SS_RR, SCTP_SS_FC, - SCTP_SS_MAX = SCTP_SS_FC + SCTP_SS_WFQ, + SCTP_SS_MAX = SCTP_SS_WFQ }; /* Probe Interval socket option */ diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ebd14ef8daac..e843760e9aaad 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -125,6 +125,7 @@ void sctp_sched_ops_init(void) sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); sctp_sched_ops_fc_init(); + sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c index b336c2f5486b3..4bd18a497a6dc 100644 --- a/net/sctp/stream_sched_fc.c +++ b/net/sctp/stream_sched_fc.c @@ -19,11 +19,32 @@ #include #include -/* Fair Capacity handling - * RFC 8260 section 3.5 +/* Fair Capacity and Weighted Fair Queueing handling + * RFC 8260 section 3.5 and 3.6 */ static void sctp_sched_fc_unsched_all(struct sctp_stream *stream); +static int sctp_sched_wfq_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + if (!weight) + return -EINVAL; + + soute->fc_weight = weight; + return 0; +} + +static int sctp_sched_wfq_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + *value = soute->fc_weight; + return 0; +} + static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid, __u16 weight, gfp_t gfp) { @@ -50,6 +71,7 @@ static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid, INIT_LIST_HEAD(&soute->fc_list); soute->fc_length = 0; + soute->fc_weight = 1; return 0; } @@ -67,7 +89,8 @@ static void sctp_sched_fc_sched(struct sctp_stream *stream, return; list_for_each_entry(pos, &stream->fc_list, fc_list) - if (pos->fc_length >= soute->fc_length) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) break; list_add_tail(&soute->fc_list, &pos->fc_list); } @@ -137,7 +160,8 @@ static void sctp_sched_fc_dequeue_done(struct sctp_outq *q, pos = soute; list_for_each_entry_continue(pos, &stream->fc_list, fc_list) - if (pos->fc_length >= soute->fc_length) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) break; list_move_tail(&soute->fc_list, &pos->fc_list); } @@ -181,3 +205,21 @@ void sctp_sched_ops_fc_init(void) { sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc); } + +static struct sctp_sched_ops sctp_sched_wfq = { + .set = sctp_sched_wfq_set, + .get = sctp_sched_wfq_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_wfq_init(void) +{ + sctp_sched_ops_register(SCTP_SS_WFQ, &sctp_sched_wfq); +} -- GitLab From a6865576317f6249f3f83cf4c10ab56e627ee153 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Mar 2023 22:02:44 -0800 Subject: [PATCH 0292/2078] selftests/bpf: Fix flaky fib_lookup test There is a report that fib_lookup test is flaky when running in parallel. A symptom of slowness or delay. An example: Testing IPv6 stale neigh set_lookup_params:PASS:inet_pton(IPV6_IFACE_ADDR) 0 nsec test_fib_lookup:PASS:bpf_prog_test_run_opts 0 nsec test_fib_lookup:FAIL:fib_lookup_ret unexpected fib_lookup_ret: actual 0 != expected 7 test_fib_lookup:FAIL:dmac not match unexpected dmac not match: actual 1 != expected 0 dmac expected 11:11:11:11:11:11 actual 00:00:00:00:00:00 [ Note that the "fib_lookup_ret unexpected fib_lookup_ret actual 0 ..." is reversed in terms of expected and actual value. Fixing in this patch also. ] One possibility is the testing stale neigh entry was marked dead by the gc (in neigh_periodic_work). The default gc_stale_time sysctl is 60s. This patch increases it to 15 mins. It also: - fixes the reversed arg (actual vs expected) in one of the ASSERT_EQ test - removes the nodad command arg when adding v4 neigh entry which currently has a warning. Fixes: 168de0233586 ("selftests/bpf: Add bpf_fib_lookup test") Reported-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309060244.3242491-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/prog_tests/fib_lookup.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 429393caf6122..a1e7121058118 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -54,11 +54,19 @@ static int setup_netns(void) SYS(fail, "ip link add veth1 type veth peer name veth2"); SYS(fail, "ip link set dev veth1 up"); + err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)")) + goto fail; + + err = write_sysctl("/proc/sys/net/ipv6/neigh/veth1/gc_stale_time", "900"); + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.neigh.veth1.gc_stale_time)")) + goto fail; + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); - SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR); SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); @@ -158,7 +166,7 @@ void test_fib_lookup(void) if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) continue; - ASSERT_EQ(tests[i].expected_ret, skel->bss->fib_lookup_ret, + ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); -- GitLab From 5a70f4a63000ba68004fb3c1aaf2f90303dd228f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Thu, 9 Mar 2023 14:38:23 +0100 Subject: [PATCH 0293/2078] bpf: Fix a typo for BPF_F_ANY_ALIGNMENT in bpf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix s/BPF_PROF_LOAD/BPF_PROG_LOAD/ typo in the documentation comment for BPF_F_ANY_ALIGNMENT in bpf.h. Signed-off-by: Michael Weiß Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309133823.944097-1-michael.weiss@aisec.fraunhofer.de --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4abddb668a107..d8c534e05b0a9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1108,7 +1108,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4abddb668a107..d8c534e05b0a9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1108,7 +1108,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and -- GitLab From 27a36bc3cdd5e0420eea90762d69bea34daf97e1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 18:32:40 +0100 Subject: [PATCH 0294/2078] selftests/bpf: Use ifname instead of ifindex in XDP compliance test tool Rely on interface name instead of interface index in error messages or logs from XDP compliance test tool. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/7dc5a8ff56c252b1a7ae29b059d0b2b1543c8b5d.1678382940.git.lorenzo@kernel.org --- tools/testing/selftests/bpf/xdp_features.c | 44 +++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index fce12165213b5..b060a0d24e442 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -25,6 +25,7 @@ static struct env { bool verbosity; + char ifname[IF_NAMESIZE]; int ifindex; bool is_tester; struct { @@ -179,7 +180,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.ifindex = if_nametoindex(arg); if (!env.ifindex) env.ifindex = strtoul(arg, NULL, 0); - if (!env.ifindex) { + if (!env.ifindex || !if_indextoname(env.ifindex, env.ifname)) { fprintf(stderr, "Bad interface index or name (%d): %s\n", errno, strerror(errno)); @@ -205,6 +206,7 @@ static void set_env_default(void) env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; env.feature.action = -EINVAL; env.ifindex = -ENODEV; + strcpy(env.ifname, "unknown"); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT, @@ -248,15 +250,18 @@ static int dut_run_echo_thread(pthread_t *t, int *sockfd) sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL, DUT_ECHO_PORT, 0, 1); if (!sockfd) { - fprintf(stderr, "Failed to create echo socket\n"); + fprintf(stderr, + "Failed creating data UDP socket on device %s\n", + env.ifname); return -errno; } /* start echo channel */ err = pthread_create(t, NULL, dut_echo_thread, sockfd); if (err) { - fprintf(stderr, "Failed creating dut_echo thread: %s\n", - strerror(-err)); + fprintf(stderr, + "Failed creating data UDP thread on device %s: %s\n", + env.ifname, strerror(-err)); free_fds(sockfd, 1); return -EINVAL; } @@ -320,9 +325,8 @@ static int dut_attach_xdp_prog(struct xdp_features *skel, int flags) err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); if (err) - fprintf(stderr, - "Failed to attach XDP program to ifindex %d\n", - env.ifindex); + fprintf(stderr, "Failed attaching XDP program to device %s\n", + env.ifname); return err; } @@ -358,13 +362,16 @@ static int dut_run(struct xdp_features *skel) sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, DUT_CTRL_PORT, 0, 1); if (!sockfd) { - fprintf(stderr, "Failed to create DUT socket\n"); + fprintf(stderr, + "Failed creating control socket on device %s\n", env.ifname); return -errno; } ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen); if (ctrl_sockfd < 0) { - fprintf(stderr, "Failed to accept connection on DUT socket\n"); + fprintf(stderr, + "Failed accepting connections on device %s control socket\n", + env.ifname); free_fds(sockfd, 1); return -errno; } @@ -422,8 +429,8 @@ static int dut_run(struct xdp_features *skel) &opts); if (err) { fprintf(stderr, - "Failed to query XDP cap for ifindex %d\n", - env.ifindex); + "Failed querying XDP cap for device %s\n", + env.ifname); goto end_thread; } @@ -540,7 +547,9 @@ static int send_echo_msg(void) sockfd = socket(AF_INET6, SOCK_DGRAM, 0); if (sockfd < 0) { - fprintf(stderr, "Failed to create echo socket\n"); + fprintf(stderr, + "Failed creating data UDP socket on device %s\n", + env.ifname); return -errno; } @@ -596,8 +605,8 @@ static int tester_run(struct xdp_features *skel) err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL); if (err) { - fprintf(stderr, "Failed to attach XDP program to ifindex %d\n", - env.ifindex); + fprintf(stderr, "Failed attaching XDP program to device %s\n", + env.ifname); goto out; } @@ -653,7 +662,7 @@ int main(int argc, char **argv) return err; if (env.ifindex < 0) { - fprintf(stderr, "Invalid ifindex\n"); + fprintf(stderr, "Invalid device name %s\n", env.ifname); return -ENODEV; } @@ -684,11 +693,12 @@ int main(int argc, char **argv) if (env.is_tester) { /* Tester */ - fprintf(stdout, "Starting tester on device %d\n", env.ifindex); + fprintf(stdout, "Starting tester service on device %s\n", + env.ifname); err = tester_run(skel); } else { /* DUT */ - fprintf(stdout, "Starting DUT on device %d\n", env.ifindex); + fprintf(stdout, "Starting test on device %s\n", env.ifname); err = dut_run(skel); } -- GitLab From c1cd734c1bb3f4d9db75c51c23306e29d8749783 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 18:32:41 +0100 Subject: [PATCH 0295/2078] selftests/bpf: Improve error logs in XDP compliance test tool Improve some error logs reported in the XDP compliance test tool. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/212fc5bd214ff706f6ef1acbe7272cf4d803ca9c.1678382940.git.lorenzo@kernel.org --- tools/testing/selftests/bpf/xdp_features.c | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index b060a0d24e442..b449788fbd39c 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -152,20 +152,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'D': if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT, &env.dut_addr, NULL)) { - fprintf(stderr, "Invalid DUT address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Device Under Test: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; case 'C': if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL)) { - fprintf(stderr, "Invalid DUT CTRL address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Device Under Test: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; case 'T': if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) { - fprintf(stderr, "Invalid Tester address: %s\n", arg); + fprintf(stderr, + "Invalid address assigned to the Tester device: %s\n", + arg); return ARGP_ERR_UNKNOWN; } break; @@ -454,7 +460,8 @@ static int dut_run(struct xdp_features *skel) &key, sizeof(key), &val, sizeof(val), 0); if (err) { - fprintf(stderr, "bpf_map_lookup_elem failed\n"); + fprintf(stderr, + "bpf_map_lookup_elem failed (%d)\n", err); goto end_thread; } @@ -496,7 +503,7 @@ static bool tester_collect_detected_cap(struct xdp_features *skel, err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key), &val, sizeof(val), 0); if (err) { - fprintf(stderr, "bpf_map_lookup_elem failed\n"); + fprintf(stderr, "bpf_map_lookup_elem failed (%d)\n", err); return false; } @@ -574,7 +581,8 @@ static int tester_run(struct xdp_features *skel) sockfd = socket(AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) { - fprintf(stderr, "Failed to create tester socket\n"); + fprintf(stderr, + "Failed creating tester service control socket\n"); return -errno; } @@ -584,7 +592,8 @@ static int tester_run(struct xdp_features *skel) err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr, sizeof(env.dut_ctrl_addr)); if (err) { - fprintf(stderr, "Failed to connect to the DUT\n"); + fprintf(stderr, + "Failed connecting to the Device Under Test control socket\n"); return -errno; } -- GitLab From a4ba62906db80246b24408e5c8a9e9d78b647b00 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:08 +0530 Subject: [PATCH 0296/2078] i40e: consolidate maximum frame size calculation for vsi Introduce new helper function to calculate max frame size for validating and setting of vsi frame size. This is used while configuring vsi, changing the MTU and attaching an XDP program to the vsi. This is in preparation of the legacy rx and multi-buffer changes to be introduced in later patches. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 71 +++++++++++---------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 467001db5070e..d375d7940308c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2896,15 +2896,35 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) } /** - * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * i40e_calculate_vsi_rx_buf_len - Calculates buffer length + * + * @vsi: VSI to calculate rx_buf_len from + */ +static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) +{ + if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + return I40E_RXBUFFER_2048; + + return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; +} + +/** + * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI * @vsi: the vsi + * @xdp_prog: XDP program **/ -static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi) +static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi, + struct bpf_prog *xdp_prog) { - if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) - return I40E_RXBUFFER_2048; + u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); + u16 chain_len; + + if (xdp_prog) + chain_len = 1; else - return I40E_RXBUFFER_3072; + chain_len = I40E_MAX_CHAINED_RX_BUFFERS; + + return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER); } /** @@ -2919,12 +2939,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + int frame_size; - if (i40e_enabled_xdp_vsi(vsi)) { - int frame_size = new_mtu + I40E_PACKET_HDR_PAD; - - if (frame_size > i40e_max_xdp_frame_size(vsi)) - return -EINVAL; + frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); + if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) { + netdev_err(netdev, "Error changing mtu to %d, Max is %d\n", + new_mtu, frame_size - I40E_PACKET_HDR_PAD); + return -EINVAL; } netdev_dbg(netdev, "changing MTU from %d to %d\n", @@ -3693,24 +3714,6 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) return err; } -/** - * i40e_calculate_vsi_rx_buf_len - Calculates buffer length - * - * @vsi: VSI to calculate rx_buf_len from - */ -static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) -{ - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) - return I40E_RXBUFFER_2048; - -#if (PAGE_SIZE < 8192) - if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN) - return I40E_RXBUFFER_1536 - NET_IP_ALIGN; -#endif - - return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; -} - /** * i40e_vsi_configure_rx - Configure the VSI for Rx * @vsi: the VSI being configured @@ -3722,13 +3725,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) int err = 0; u16 i; - vsi->max_frame = I40E_MAX_RXBUFFER; + vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); #if (PAGE_SIZE < 8192) if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING && - vsi->netdev->mtu <= ETH_DATA_LEN) - vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + vsi->netdev->mtu <= ETH_DATA_LEN) { + vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + vsi->max_frame = vsi->rx_buf_len; + } #endif /* set up individual rings */ @@ -13316,14 +13321,14 @@ out_err: static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int frame_size = i40e_max_vsi_frame_size(vsi, prog); struct i40e_pf *pf = vsi->back; struct bpf_prog *old_prog; bool need_reset; int i; /* Don't allow frames that span over multiple buffers */ - if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) { + if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); return -EINVAL; } -- GitLab From f7f732a7196d2e876639b1af453b30a6a5fe4a90 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:09 +0530 Subject: [PATCH 0297/2078] i40e: change Rx buffer size for legacy-rx to support XDP multi-buffer Adding support for XDP multi-buffer entails adding information of all the fragments of the packet in the xdp_buff. This approach implies that underlying buffer has to provide tailroom for skb_shared_info. In the legacy-rx mode, driver can only configure up to 2k sized Rx buffers and with the current configuration of 2k sized Rx buffers there is no way to do tailroom reservation for skb_shared_info. Hence size of Rx buffers is now lowered to 2048 - sizeof(skb_shared_info). Also, driver can only chain up to 5 Rx buffers and this means max MTU supported for legacy-rx is now 8614 (5 * rx_buffer_len - ETH header with VLAN). Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 7 +++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4934ff58332c3..afc4fa8c66af4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5402,6 +5402,13 @@ flags_complete: return -EOPNOTSUPP; } + if ((changed_flags & I40E_FLAG_LEGACY_RX) && + I40E_2K_TOO_SMALL_WITH_PADDING) { + dev_warn(&pf->pdev->dev, + "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); + return -EOPNOTSUPP; + } + if ((changed_flags & new_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && (new_flags & I40E_FLAG_MFP_ENABLED)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d375d7940308c..e8cf5644bf102 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2903,7 +2903,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) { if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) - return I40E_RXBUFFER_2048; + return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048); return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; } @@ -3661,10 +3661,16 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } /* configure Rx buffer alignment */ - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) { + if (I40E_2K_TOO_SMALL_WITH_PADDING) { + dev_info(&vsi->back->pdev->dev, + "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); + return -EOPNOTSUPP; + } clear_ring_build_skb_enabled(ring); - else + } else { set_ring_build_skb_enabled(ring); + } ring->rx_offset = i40e_rx_offset(ring); -- GitLab From e2843f037127c3a9db26718aaa293d2a8e5881e4 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:10 +0530 Subject: [PATCH 0298/2078] i40e: add pre-xdp page_count in rx_buffer Page count of rx_buffer needs to be stored prior to XDP call to prevent page recycling in case that buffer would be freed within xdp redirect path. Instead of storing it on the stack, now it is stored in the rx_buffer struct. This will help in processing multi-buffers as the page counts of all rx_buffers (of the same packet) don't need to be stored on stack. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 23 +++++++-------------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 + 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 924f972b91faf..a7fba294a8f42 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1970,7 +1970,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buffer: buffer containing the page * @rx_stats: rx stats structure for the rx ring - * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling i40e_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is @@ -1981,8 +1980,7 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, * or busy if it could not be reused. */ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, - struct i40e_rx_queue_stats *rx_stats, - int rx_buffer_pgcnt) + struct i40e_rx_queue_stats *rx_stats) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1995,7 +1993,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) { + if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) { rx_stats->page_busy_count++; return false; } @@ -2058,19 +2056,17 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use * @rx_ring: rx descriptor ring to transact packets on * @size: size of buffer to add to skb - * @rx_buffer_pgcnt: buffer page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, - const unsigned int size, - int *rx_buffer_pgcnt) + const unsigned int size) { struct i40e_rx_buffer *rx_buffer; rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); - *rx_buffer_pgcnt = + rx_buffer->page_count = #if (PAGE_SIZE < 8192) page_count(rx_buffer->page); #else @@ -2226,16 +2222,14 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from - * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call * * This function will clean up the contents of the rx_buffer. It will * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - int rx_buffer_pgcnt) + struct i40e_rx_buffer *rx_buffer) { - if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) { + if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -2457,7 +2451,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; - int rx_buffer_pgcnt; unsigned int size; u64 qword; @@ -2500,7 +2493,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, break; i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); + rx_buffer = i40e_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ if (!skb) { @@ -2541,7 +2534,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, break; } - i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); + i40e_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; i40e_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 768290dc6f48b..eec4a4a99b9c8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -277,6 +277,7 @@ struct i40e_rx_buffer { struct page *page; __u32 page_offset; __u16 pagecnt_bias; + __u32 page_count; }; struct i40e_queue_stats { -- GitLab From 03e88c8a791c0655ddea07fd4c7cd6cb16087c8f Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:11 +0530 Subject: [PATCH 0299/2078] i40e: Change size to truesize when using i40e_rx_buffer_flip() Truesize is now passed directly to i40e_rx_buffer_flip() instead of size so that it does not need to recalculate truesize from size using i40e_rx_frame_truesize() before adjusting page offset. With these change the function can now be used during skb building and adding frags. In later patches it will also be easier for adjusting page offsets for multi-buffers. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 54 ++++++++------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a7fba294a8f42..fa44a2c353b23 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2018,6 +2018,21 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, return true; } +/** + * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region + * @rx_buffer: Rx buffer to adjust + * @truesize: Size of adjustment + **/ +static void i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer, + unsigned int truesize) +{ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif +} + /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -2045,11 +2060,7 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, rx_buffer->page_offset, size, truesize); /* page is being used so we must update the page offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + i40e_rx_buffer_flip(rx_buffer, truesize); } /** @@ -2154,11 +2165,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, size, truesize); /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + i40e_rx_buffer_flip(rx_buffer, truesize); } else { /* buffer is unused, reset bias back to rx_buffer */ rx_buffer->pagecnt_bias++; @@ -2209,11 +2216,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, skb_metadata_set(skb, metasize); /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + i40e_rx_buffer_flip(rx_buffer, truesize); return skb; } @@ -2326,25 +2329,6 @@ xdp_out: return result; } -/** - * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region - * @rx_ring: Rx ring - * @rx_buffer: Rx buffer to adjust - * @size: Size of adjustment - **/ -static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size); - -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - /** * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register * @xdp_ring: XDP Tx ring @@ -2513,7 +2497,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, if (xdp_res) { if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { xdp_xmit |= xdp_res; - i40e_rx_buffer_flip(rx_ring, rx_buffer, size); + i40e_rx_buffer_flip(rx_buffer, xdp.frame_sz); } else { rx_buffer->pagecnt_bias++; } -- GitLab From 2bc0de9aca3ebdf24674f5a2a7890fde6304f5ca Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:12 +0530 Subject: [PATCH 0300/2078] i40e: use frame_sz instead of recalculating truesize for building skb In skb path truesize is calculated while building skb. This is now avoided and xdp->frame_is used instead for both i40e_build_skb() and i40e_construct_skb(). Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index fa44a2c353b23..e34595ca4fbe4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2113,11 +2113,6 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif unsigned int headlen; struct sk_buff *skb; @@ -2162,10 +2157,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, if (size) { skb_add_rx_frag(skb, 0, rx_buffer->page, rx_buffer->page_offset + headlen, - size, truesize); + size, xdp->frame_sz); /* buffer is used by skb, update page_offset */ - i40e_rx_buffer_flip(rx_buffer, truesize); + i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); } else { /* buffer is unused, reset bias back to rx_buffer */ rx_buffer->pagecnt_bias++; @@ -2188,13 +2183,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta @@ -2205,7 +2193,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, net_prefetch(xdp->data_meta); /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); + skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) return NULL; @@ -2216,7 +2204,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, skb_metadata_set(skb, metasize); /* buffer is used by skb, update page_offset */ - i40e_rx_buffer_flip(rx_buffer, truesize); + i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); return skb; } -- GitLab From e9031f2da1aef34b0b4c659ead613c335b46ae92 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:13 +0530 Subject: [PATCH 0301/2078] i40e: introduce next_to_process to i40e_ring Add a new field called next_to_process in the i40e_ring that is advanced for every buffer and change the semantics of next_to_clean to point to the first buffer of a packet. Driver will use next_to_process in the same way next_to_clean was used previously. For the non multi-buffer case, next_to_process and next_to_clean will always be the same since each packet consists of a single buffer. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 26 ++++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 4 ++++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e34595ca4fbe4..7fa35ff526895 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1524,6 +1524,7 @@ skip_free: rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; + rx_ring->next_to_process = 0; rx_ring->next_to_use = 0; } @@ -1576,6 +1577,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; + rx_ring->next_to_process = 0; rx_ring->next_to_use = 0; /* XDP RX-queue info only needed for RX rings exposed to XDP */ @@ -2076,7 +2078,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, { struct i40e_rx_buffer *rx_buffer; - rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); + rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_process); rx_buffer->page_count = #if (PAGE_SIZE < 8192) page_count(rx_buffer->page); @@ -2375,16 +2377,16 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res) } /** - * i40e_inc_ntc: Advance the next_to_clean index + * i40e_inc_ntp: Advance the next_to_process index * @rx_ring: Rx ring **/ -static void i40e_inc_ntc(struct i40e_ring *rx_ring) +static void i40e_inc_ntp(struct i40e_ring *rx_ring) { - u32 ntc = rx_ring->next_to_clean + 1; + u32 ntp = rx_ring->next_to_process + 1; - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - prefetch(I40E_RX_DESC(rx_ring, ntc)); + ntp = (ntp < rx_ring->count) ? ntp : 0; + rx_ring->next_to_process = ntp; + prefetch(I40E_RX_DESC(rx_ring, ntp)); } /** @@ -2421,6 +2423,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, xdp_prog = READ_ONCE(rx_ring->xdp_prog); while (likely(total_rx_packets < (unsigned int)budget)) { + u16 ntp = rx_ring->next_to_process; struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; unsigned int size; @@ -2433,7 +2436,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, cleaned_count = 0; } - rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); + rx_desc = I40E_RX_DESC(rx_ring, ntp); /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr @@ -2452,8 +2455,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, i40e_clean_programming_status(rx_ring, rx_desc->raw.qword[0], qword); - rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); - i40e_inc_ntc(rx_ring); + rx_buffer = i40e_rx_bi(rx_ring, ntp); + i40e_inc_ntp(rx_ring); i40e_reuse_rx_page(rx_ring, rx_buffer); cleaned_count++; continue; @@ -2509,7 +2512,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, i40e_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; - i40e_inc_ntc(rx_ring); + i40e_inc_ntp(rx_ring); + rx_ring->next_to_clean = rx_ring->next_to_process; if (i40e_is_non_eop(rx_ring, rx_desc)) continue; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index eec4a4a99b9c8..75888a9ab06d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -337,6 +337,10 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* Next descriptor to be processed; next_to_clean is updated only on + * processing EOP descriptor + */ + u16 next_to_process; /* high bit set means dynamic, use accessor routines to read/write. * hardware only supports 2us resolution for the ITR registers. * these values always store the USER setting, and must be converted -- GitLab From 01aa49e31e1674e22dd9c868ca6b4b945acd621e Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:14 +0530 Subject: [PATCH 0302/2078] i40e: add xdp_buff to i40e_ring struct Store xdp_buff on Rx ring struct in preparation for XDP multi-buffer support. This will allow us to combine fragmented frames across separate NAPI cycles in the same way as currently skb fragments are handled. This means that skb pointer on Rx ring will become redundant and will be removed in a later patch. As a consequence i40e_trace() now uses xdp instead of skb pointer. Truesize only needs to be calculated for page sizes bigger than 4k as it is always half-page for 4k pages. With xdp_buff on ring, frame size can now be set during xdp_init_buff() and need not be repopulated in each NAPI call for 4k pages. As a consequence i40e_rx_frame_truesize() is now used only for bigger pages. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ drivers/net/ethernet/intel/i40e/i40e_trace.h | 20 ++++++------ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 33 ++++++++------------ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 7 +++++ 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e8cf5644bf102..f2c151a01e77d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3616,6 +3616,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } } + xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); + rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index 79d587ad54098..33b4e30f5e004 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h @@ -162,45 +162,45 @@ DECLARE_EVENT_CLASS( TP_PROTO(struct i40e_ring *ring, union i40e_16byte_rx_desc *desc, - struct sk_buff *skb), + struct xdp_buff *xdp), - TP_ARGS(ring, desc, skb), + TP_ARGS(ring, desc, xdp), TP_STRUCT__entry( __field(void*, ring) __field(void*, desc) - __field(void*, skb) + __field(void*, xdp) __string(devname, ring->netdev->name) ), TP_fast_assign( __entry->ring = ring; __entry->desc = desc; - __entry->skb = skb; + __entry->xdp = xdp; __assign_str(devname, ring->netdev->name); ), TP_printk( - "netdev: %s ring: %p desc: %p skb %p", + "netdev: %s ring: %p desc: %p xdp %p", __get_str(devname), __entry->ring, - __entry->desc, __entry->skb) + __entry->desc, __entry->xdp) ); DEFINE_EVENT( i40e_rx_template, i40e_clean_rx_irq, TP_PROTO(struct i40e_ring *ring, union i40e_16byte_rx_desc *desc, - struct sk_buff *skb), + struct xdp_buff *xdp), - TP_ARGS(ring, desc, skb)); + TP_ARGS(ring, desc, xdp)); DEFINE_EVENT( i40e_rx_template, i40e_clean_rx_irq_rx, TP_PROTO(struct i40e_ring *ring, union i40e_16byte_rx_desc *desc, - struct sk_buff *skb), + struct xdp_buff *xdp), - TP_ARGS(ring, desc, skb)); + TP_ARGS(ring, desc, xdp)); DECLARE_EVENT_CLASS( i40e_xmit_template, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7fa35ff526895..5544c2d43a929 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1619,21 +1619,19 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } +#if (PAGE_SIZE >= 8192) static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring, unsigned int size) { unsigned int truesize; -#if (PAGE_SIZE < 8192) - truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else truesize = rx_ring->rx_offset ? SKB_DATA_ALIGN(size + rx_ring->rx_offset) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : SKB_DATA_ALIGN(size); -#endif return truesize; } +#endif /** * i40e_alloc_mapped_page - recycle or make a new page @@ -2405,21 +2403,16 @@ static void i40e_inc_ntp(struct i40e_ring *rx_ring) static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, unsigned int *rx_cleaned) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); unsigned int offset = rx_ring->rx_offset; + struct xdp_buff *xdp = &rx_ring->xdp; struct sk_buff *skb = rx_ring->skb; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; bool failure = false; - struct xdp_buff xdp; int xdp_res = 0; -#if (PAGE_SIZE < 8192) - frame_sz = i40e_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); - xdp_prog = READ_ONCE(rx_ring->xdp_prog); while (likely(total_rx_packets < (unsigned int)budget)) { @@ -2467,7 +2460,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, if (!size) break; - i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); + i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp); rx_buffer = i40e_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ @@ -2476,19 +2469,19 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, hard_start = page_address(rx_buffer->page) + rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); - xdp_buff_clear_frags_flag(&xdp); + xdp_prepare_buff(xdp, hard_start, offset, size, true); + xdp_buff_clear_frags_flag(xdp); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); + xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog); + xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog); } if (xdp_res) { if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { xdp_xmit |= xdp_res; - i40e_rx_buffer_flip(rx_buffer, xdp.frame_sz); + i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); } else { rx_buffer->pagecnt_bias++; } @@ -2497,9 +2490,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, } else if (skb) { i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); } else if (ring_uses_build_skb(rx_ring)) { - skb = i40e_build_skb(rx_ring, rx_buffer, &xdp); + skb = i40e_build_skb(rx_ring, rx_buffer, xdp); } else { - skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp); + skb = i40e_construct_skb(rx_ring, rx_buffer, xdp); } /* exit if we failed to retrieve a buffer */ @@ -2528,7 +2521,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, /* populate checksum, VLAN, and protocol */ i40e_process_skb_fields(rx_ring, rx_desc, skb); - i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp); napi_gro_receive(&rx_ring->q_vector->napi, skb); skb = NULL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 75888a9ab06d9..55da9fde9d170 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -337,6 +337,13 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* Storing xdp_buff on ring helps in saving the state of partially built + * packet when i40e_clean_rx_ring_irq() must return before it sees EOP + * and to resume packet building for this ring in the next call to + * i40e_clean_rx_ring_irq(). + */ + struct xdp_buff xdp; + /* Next descriptor to be processed; next_to_clean is updated only on * processing EOP descriptor */ -- GitLab From e213ced19befc09d6d6913799053b67896596cd1 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Fri, 10 Mar 2023 00:26:15 +0530 Subject: [PATCH 0303/2078] i40e: add support for XDP multi-buffer Rx This patch adds multi-buffer support for the i40e_driver. i40e_clean_rx_irq() is modified to collate all the buffers of a packet before calling the XDP program. xdp_buff is built for the first frag of the packet and subsequent frags are added to it. 'next_to_process' is incremented for all non-EOP frags while 'next_to_clean' stays at the first descriptor of the packet. XDP program is called only on receiving EOP frag. New functions are added for adding frags to xdp_buff and for post processing of the buffers once the xdp prog has run. For XDP_PASS this results in a skb with multiple fragments. i40e_build_skb() builds the skb around xdp buffer that already contains frags data. So i40e_add_rx_frag() helper function is now removed. Since fields before 'dataref' in skb_shared_info are cleared during napi_skb_build(), xdp_update_skb_shared_info() is called to set those. For i40e_construct_skb(), all the frags data needs to be copied from xdp_buffer's shared_skb_info to newly constructed skb's shared_skb_info. This also means 'skb' does not need to be preserved across i40e_napi_poll() calls and hence is removed from i40e_ring structure. Previously i40e_alloc_rx_buffers() was called for every 32 cleaned buffers. For multi-buffers this may not be optimal as there may be more cleaned buffers in each i40e_clean_rx_irq() call. So this is now called when at least half of the ring size has been cleaned. Signed-off-by: Tirthendu Sarkar Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 314 +++++++++++++------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 - 3 files changed, 211 insertions(+), 118 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f2c151a01e77d..053be338e4ba3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2919,7 +2919,7 @@ static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi, u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); u16 chain_len; - if (xdp_prog) + if (xdp_prog && !xdp_prog->aux->xdp_has_frags) chain_len = 1; else chain_len = I40E_MAX_CHAINED_RX_BUFFERS; @@ -13337,7 +13337,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); return -EINVAL; } @@ -13823,7 +13823,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY; + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; } else { /* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5544c2d43a929..32cce90abbb43 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1477,9 +1477,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - if (rx_ring->xsk_pool) { i40e_xsk_clean_rx_ring(rx_ring); goto skip_free; @@ -2033,36 +2030,6 @@ static void i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer, #endif } -/** - * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buffer->page to the skb. - * It will just attach the page as a frag to the skb. - * - * The function will then update the page offset. - **/ -static void i40e_add_rx_frag(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset); -#endif - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ - i40e_rx_buffer_flip(rx_buffer, truesize); -} - /** * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use * @rx_ring: rx descriptor ring to transact packets on @@ -2099,20 +2066,82 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, } /** - * i40e_construct_skb - Allocate skb and populate it + * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from + * + * This function will clean up the contents of the rx_buffer. It will + * either recycle the buffer or unmap it and free the associated resources. + */ +static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer) +{ + if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) { + /* hand second half of page back to the ring */ + i40e_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + /* clear contents of buffer_info */ + rx_buffer->page = NULL; + } +} + +/** + * i40e_process_rx_buffs- Processing of buffers post XDP prog or on error + * @rx_ring: Rx descriptor ring to transact packets on + * @xdp_res: Result of the XDP program + * @xdp: xdp_buff pointing to the data + **/ +static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, + struct xdp_buff *xdp) +{ + u32 next = rx_ring->next_to_clean; + struct i40e_rx_buffer *rx_buffer; + + xdp->flags = 0; + + while (1) { + rx_buffer = i40e_rx_bi(rx_ring, next); + if (++next == rx_ring->count) + next = 0; + + if (!rx_buffer->page) + continue; + + if (xdp_res == I40E_XDP_CONSUMED) + rx_buffer->pagecnt_bias++; + else + i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); + + /* EOP buffer will be put in i40e_clean_rx_irq() */ + if (next == rx_ring->next_to_process) + return; + + i40e_put_rx_buffer(rx_ring, rx_buffer); + } +} + +/** + * i40e_construct_skb - Allocate skb and populate it + * @rx_ring: rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data + * @nr_frags: number of buffers for the packet * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the * skb correctly. */ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - struct xdp_buff *xdp) + struct xdp_buff *xdp, + u32 nr_frags) { unsigned int size = xdp->data_end - xdp->data; + struct i40e_rx_buffer *rx_buffer; unsigned int headlen; struct sk_buff *skb; @@ -2152,13 +2181,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); + rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); /* update all of the pointers */ size -= headlen; if (size) { + if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { + dev_kfree_skb(skb); + return NULL; + } skb_add_rx_frag(skb, 0, rx_buffer->page, rx_buffer->page_offset + headlen, size, xdp->frame_sz); - /* buffer is used by skb, update page_offset */ i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); } else { @@ -2166,21 +2199,40 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, rx_buffer->pagecnt_bias++; } + if (unlikely(xdp_buff_has_frags(xdp))) { + struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb); + + sinfo = xdp_get_shared_info_from_buff(xdp); + memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], + sizeof(skb_frag_t) * nr_frags); + + xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); + + /* First buffer has already been processed, so bump ntc */ + if (++rx_ring->next_to_clean == rx_ring->count) + rx_ring->next_to_clean = 0; + + i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp); + } + return skb; } /** * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buffer: Rx buffer to pull data from * @xdp: xdp_buff pointing to the data + * @nr_frags: number of buffers for the packet * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - struct xdp_buff *xdp) + struct xdp_buff *xdp, + u32 nr_frags) { unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; @@ -2203,36 +2255,25 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, if (metasize) skb_metadata_set(skb, metasize); - /* buffer is used by skb, update page_offset */ - i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); + if (unlikely(xdp_buff_has_frags(xdp))) { + struct skb_shared_info *sinfo; - return skb; -} + sinfo = xdp_get_shared_info_from_buff(xdp); + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); -/** - * i40e_put_rx_buffer - Clean up used buffer and either recycle or free - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * - * This function will clean up the contents of the rx_buffer. It will - * either recycle the buffer or unmap it and free the associated resources. - */ -static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer) -{ - if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) { - /* hand second half of page back to the ring */ - i40e_reuse_rx_page(rx_ring, rx_buffer); + i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp); } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - i40e_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - /* clear contents of buffer_info */ - rx_buffer->page = NULL; + struct i40e_rx_buffer *rx_buffer; + + rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); + /* buffer is used by skb, update page_offset */ + i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); } + + return skb; } /** @@ -2387,6 +2428,55 @@ static void i40e_inc_ntp(struct i40e_ring *rx_ring) prefetch(I40E_RX_DESC(rx_ring, ntp)); } +/** + * i40e_add_xdp_frag: Add a frag to xdp_buff + * @xdp: xdp_buff pointing to the data + * @nr_frags: return number of buffers for the packet + * @rx_buffer: rx_buffer holding data of the current frag + * @size: size of data of current frag + */ +static int i40e_add_xdp_frag(struct xdp_buff *xdp, u32 *nr_frags, + struct i40e_rx_buffer *rx_buffer, u32 size) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + + if (!xdp_buff_has_frags(xdp)) { + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(xdp); + } else if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) { + /* Overflowing packet: All frags need to be dropped */ + return -ENOMEM; + } + + __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buffer->page, + rx_buffer->page_offset, size); + + sinfo->xdp_frags_size += size; + + if (page_is_pfmemalloc(rx_buffer->page)) + xdp_buff_set_frag_pfmemalloc(xdp); + *nr_frags = sinfo->nr_frags; + + return 0; +} + +/** + * i40e_consume_xdp_buff - Consume all the buffers of the packet and update ntc + * @rx_ring: rx descriptor ring to transact packets on + * @xdp: xdp_buff pointing to the data + * @rx_buffer: rx_buffer of eop desc + */ +static void i40e_consume_xdp_buff(struct i40e_ring *rx_ring, + struct xdp_buff *xdp, + struct i40e_rx_buffer *rx_buffer) +{ + i40e_process_rx_buffs(rx_ring, I40E_XDP_CONSUMED, xdp); + i40e_put_rx_buffer(rx_ring, rx_buffer); + rx_ring->next_to_clean = rx_ring->next_to_process; + xdp->data = NULL; +} + /** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on @@ -2405,9 +2495,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); + u16 clean_threshold = rx_ring->count / 2; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - struct sk_buff *skb = rx_ring->skb; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; bool failure = false; @@ -2419,11 +2509,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, u16 ntp = rx_ring->next_to_process; struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + struct sk_buff *skb; unsigned int size; + u32 nfrags = 0; + bool neop; u64 qword; /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= I40E_RX_BUFFER_WRITE) { + if (cleaned_count >= clean_threshold) { failure = failure || i40e_alloc_rx_buffers(rx_ring, cleaned_count); cleaned_count = 0; @@ -2461,76 +2554,83 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, break; i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp); + /* retrieve a buffer from the ring */ rx_buffer = i40e_get_rx_buffer(rx_ring, size); - /* retrieve a buffer from the ring */ - if (!skb) { + neop = i40e_is_non_eop(rx_ring, rx_desc); + i40e_inc_ntp(rx_ring); + + if (!xdp->data) { unsigned char *hard_start; hard_start = page_address(rx_buffer->page) + rx_buffer->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, true); - xdp_buff_clear_frags_flag(xdp); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog); + } else if (i40e_add_xdp_frag(xdp, &nfrags, rx_buffer, size) && + !neop) { + /* Overflowing packet: Drop all frags on EOP */ + i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer); + break; } + if (neop) + continue; + + xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog); + if (xdp_res) { - if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { - xdp_xmit |= xdp_res; + xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); + + if (unlikely(xdp_buff_has_frags(xdp))) { + i40e_process_rx_buffs(rx_ring, xdp_res, xdp); + size = xdp_get_buff_len(xdp); + } else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); } else { rx_buffer->pagecnt_bias++; } total_rx_bytes += size; - total_rx_packets++; - } else if (skb) { - i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { - skb = i40e_build_skb(rx_ring, rx_buffer, xdp); } else { - skb = i40e_construct_skb(rx_ring, rx_buffer, xdp); - } + if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, xdp, nfrags); + else + skb = i40e_construct_skb(rx_ring, xdp, nfrags); + + /* drop if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer); + break; + } - /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { - rx_ring->rx_stats.alloc_buff_failed++; - rx_buffer->pagecnt_bias++; - break; - } + if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) + goto process_next; - i40e_put_rx_buffer(rx_ring, rx_buffer); - cleaned_count++; + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; - i40e_inc_ntp(rx_ring); - rx_ring->next_to_clean = rx_ring->next_to_process; - if (i40e_is_non_eop(rx_ring, rx_desc)) - continue; + /* populate checksum, VLAN, and protocol */ + i40e_process_skb_fields(rx_ring, rx_desc, skb); - if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) { - skb = NULL; - continue; + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp); + napi_gro_receive(&rx_ring->q_vector->napi, skb); } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - /* populate checksum, VLAN, and protocol */ - i40e_process_skb_fields(rx_ring, rx_desc, skb); - - i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp); - napi_gro_receive(&rx_ring->q_vector->napi, skb); - skb = NULL; - /* update budget accounting */ total_rx_packets++; +process_next: + cleaned_count += nfrags + 1; + i40e_put_rx_buffer(rx_ring, rx_buffer); + rx_ring->next_to_clean = rx_ring->next_to_process; + + xdp->data = NULL; } i40e_finalize_xdp_rx(rx_ring, xdp_xmit); - rx_ring->skb = skb; i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 55da9fde9d170..8c3d24012c54c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -392,14 +392,6 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; - struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must - * return before it sees the EOP for - * the current packet, we save that skb - * here and resume receiving this - * packet the next time - * i40e_clean_rx_ring_irq() is called - * for this ring. - */ struct i40e_channel *ch; u16 rx_offset; -- GitLab From 63d78b7e8ca2d0eb8c687a355fa19d01b6fcc723 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 9 Mar 2023 17:24:10 -0800 Subject: [PATCH 0304/2078] selftests/bpf: Workaround verification failure for fexit_bpf2bpf/func_replace_return_code With latest llvm17, selftest fexit_bpf2bpf/func_replace_return_code has the following verification failure: 0: R1=ctx(off=0,imm=0) R10=fp0 ; int connect_v4_prog(struct bpf_sock_addr *ctx) 0: (bf) r7 = r1 ; R1=ctx(off=0,imm=0) R7_w=ctx(off=0,imm=0) 1: (b4) w6 = 0 ; R6_w=0 ; memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); ... ; return do_bind(ctx) ? 1 : 0; 179: (bf) r1 = r7 ; R1=ctx(off=0,imm=0) R7=ctx(off=0,imm=0) 180: (85) call pc+147 Func#3 is global and valid. Skipping. 181: R0_w=scalar() 181: (bc) w6 = w0 ; R0_w=scalar() R6_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 182: (05) goto pc-129 ; } 54: (bc) w0 = w6 ; R0_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 55: (95) exit At program exit the register R0 has value (0x0; 0xffffffff) should have been in (0x0; 0x1) processed 281 insns (limit 1000000) max_states_per_insn 1 total_states 26 peak_states 26 mark_read 13 -- END PROG LOAD LOG -- libbpf: prog 'connect_v4_prog': failed to load: -22 The corresponding source code: __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { struct sockaddr_in sa = {}; sa.sin_family = AF_INET; sa.sin_port = bpf_htons(0); sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) return 0; return 1; } ... SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { ... return do_bind(ctx) ? 1 : 0; } Insn 180 is a call to 'do_bind'. The call's return value is also the return value for the program. Since do_bind() returns 0/1, so it is legitimate for compiler to optimize 'return do_bind(ctx) ? 1 : 0' to 'return do_bind(ctx)'. However, such optimization breaks verifier as the return value of 'do_bind()' is marked as any scalar which violates the requirement of prog return value 0/1. There are two ways to fix this problem, (1) changing 'return 1' in do_bind() to e.g. 'return 10' so the compiler has to do 'do_bind(ctx) ? 1 :0', or (2) suggested by Andrii, marking do_bind() with __weak attribute so the compiler cannot make any assumption on do_bind() return value. This patch adopted adding __weak approach which is simpler and more resistant to potential compiler optimizations. Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230310012410.2920570-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/connect4_prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index ec25371de789d..7ef49ec04838d 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -32,7 +32,7 @@ #define IFNAMSIZ 16 #endif -__attribute__ ((noinline)) +__attribute__ ((noinline)) __weak int do_bind(struct bpf_sock_addr *ctx) { struct sockaddr_in sa = {}; -- GitLab From 14296c7d72ec26d18cbe200a43c6d5b7d967ab89 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 8 Mar 2023 12:12:30 +0000 Subject: [PATCH 0305/2078] mctp: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Hitomi Hasegawa Cc: Jeremy Kerr Cc: Matt Johnston Link: https://lore.kernel.org/r/20230308121230.5354-2-nick.alcock@oracle.com Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 3150f3f0c8725..bb4bd0b6a4f79 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -704,7 +704,6 @@ subsys_initcall(mctp_init); module_exit(mctp_exit); MODULE_DESCRIPTION("MCTP core"); -MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jeremy Kerr "); MODULE_ALIAS_NETPROTO(PF_MCTP); -- GitLab From efb5b62d72719fd1df1f927542c58d1e21d69e19 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 8 Mar 2023 12:12:29 +0000 Subject: [PATCH 0306/2078] lib: packing: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Hitomi Hasegawa Cc: Vladimir Oltean Link: https://lore.kernel.org/r/20230308121230.5354-1-nick.alcock@oracle.com Signed-off-by: Jakub Kicinski --- lib/packing.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/packing.c b/lib/packing.c index a96169237ae66..3f656167c17e0 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -198,5 +198,4 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, } EXPORT_SYMBOL(packing); -MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); -- GitLab From 6978052448f9eb19f7b03243ac0416104e5ee50d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Mar 2023 15:20:06 +0100 Subject: [PATCH 0307/2078] netlink: remove unused 'compare' function No users in the tree. Tested with allmodconfig build. Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20230308142006.20879-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 1 - net/netlink/af_netlink.c | 2 -- net/netlink/af_netlink.h | 1 - 3 files changed, 4 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c43ac7690eca1..3e87432521675 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -50,7 +50,6 @@ struct netlink_kernel_cfg { struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); - bool (*compare)(struct net *net, struct sock *sk); }; struct sock *__netlink_kernel_create(struct net *net, int unit, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c642776597531..877f1da1a8aca 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2097,8 +2097,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].bind = cfg->bind; nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; - if (cfg->compare) - nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4d..90a3198a9b7f7 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -64,7 +64,6 @@ struct netlink_table { struct module *module; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); - bool (*compare)(struct net *net, struct sock *sock); int registered; }; -- GitLab From 513bdd9473888ba4b1441923c2711cb5ecd06cfd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 8 Mar 2023 21:19:55 +0100 Subject: [PATCH 0308/2078] net: phy: smsc: use phy_set_bits in smsc_phy_config_init Simplify the code by using phy_set_bits(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/b64d9f86-d029-b911-bbe9-6ca6889399d7@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index feb6ac3939ac4..ac951d67edcc4 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -99,20 +99,13 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) static int smsc_phy_config_init(struct phy_device *phydev) { struct smsc_phy_priv *priv = phydev->priv; - int rc; if (!priv->energy_enable || phydev->irq != PHY_POLL) return 0; - rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); - - if (rc < 0) - return rc; - - /* Enable energy detect mode for this SMSC Transceivers */ - rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, - rc | MII_LAN83C185_EDPWRDOWN); - return rc; + /* Enable energy detect power down mode */ + return phy_set_bits(phydev, MII_LAN83C185_CTRL_STATUS, + MII_LAN83C185_EDPWRDOWN); } static int smsc_phy_reset(struct phy_device *phydev) -- GitLab From 62423bd2d2e231951245d77740a58027a2d81ef9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Mar 2023 18:26:48 +0000 Subject: [PATCH 0309/2078] net: sched: remove qdisc_watchdog->last_expires This field mirrors hrtimer softexpires, we can instead use the existing helpers. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230308182648.1150762-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 1 - net/sched/sch_api.c | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2016839991a42..bb0bd69fb655d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -64,7 +64,6 @@ static inline psched_time_t psched_get_time(void) } struct qdisc_watchdog { - u64 last_expires; struct hrtimer timer; struct Qdisc *qdisc; }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aba789c30a2eb..fdb8f429333d2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -639,14 +639,16 @@ void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, return; if (hrtimer_is_queued(&wd->timer)) { + u64 softexpires; + + softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer)); /* If timer is already set in [expires, expires + delta_ns], * do not reprogram it. */ - if (wd->last_expires - expires <= delta_ns) + if (softexpires - expires <= delta_ns) return; } - wd->last_expires = expires; hrtimer_start_range_ns(&wd->timer, ns_to_ktime(expires), delta_ns, -- GitLab From 76b9bf965c98c9b53ef7420b3b11438dbd764f92 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 8 Mar 2023 11:23:13 +0200 Subject: [PATCH 0310/2078] neighbour: delete neigh_lookup_nodev as not used neigh_lookup_nodev isn't used in the kernel after removal of DECnet. So let's remove it. Fixes: 1202cdd66531 ("Remove DECnet support from kernel") Signed-off-by: Leon Romanovsky Reviewed-by: Eric Dumazet Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/eb5656200d7964b2d177a36b77efa3c597d6d72d.1678267343.git.leonro@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 -- net/core/neighbour.c | 31 ------------------------------- 2 files changed, 33 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 2f2a6023fb0e5..234799ca527e0 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -336,8 +336,6 @@ void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey); struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref); static inline struct neighbour *neigh_create(struct neigh_table *tbl, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6798f6d2423b9..0116b0ff91a78 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -627,37 +627,6 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, } EXPORT_SYMBOL(neigh_lookup); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey) -{ - struct neighbour *n; - unsigned int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; - - NEIGH_CACHE_STAT_INC(tbl, lookups); - - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (!memcmp(n->primary_key, pkey, key_len) && - net_eq(dev_net(n->dev), net)) { - if (!refcount_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } - } - - rcu_read_unlock_bh(); - return n; -} -EXPORT_SYMBOL(neigh_lookup_nodev); - static struct neighbour * ___neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, u32 flags, -- GitLab From b3a8df9f27c094dd41fe88b7b97f64d8dcb7d696 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Mar 2023 14:04:52 +0100 Subject: [PATCH 0311/2078] net: ethernet: ti: am65-cpsw: Convert to devm_of_phy_optional_get() Use the new devm_of_phy_optional_get() helper instead of open-coding the same operation. Signed-off-by: Geert Uytterhoeven Reviewed-by: Siddharth Vadapalli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/01605ea233ff7fc09bb0ea34fc8126af73db83f9.1678280599.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4e3861c47708c..25996826edabc 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1470,11 +1470,9 @@ static int am65_cpsw_init_serdes_phy(struct device *dev, struct device_node *por struct phy *phy; int ret; - phy = devm_of_phy_get(dev, port_np, name); - if (PTR_ERR(phy) == -ENODEV) - return 0; - if (IS_ERR(phy)) - return PTR_ERR(phy); + phy = devm_of_phy_optional_get(dev, port_np, name); + if (IS_ERR_OR_NULL(phy)) + return PTR_ERR_OR_ZERO(phy); /* Serdes PHY exists. Store it. */ port->slave.serdes_phy = phy; -- GitLab From fd9c31f834416e3060061dbcb45ababaa25cdfe2 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 8 Mar 2023 10:11:53 +0800 Subject: [PATCH 0312/2078] udp: introduce __sk_mem_schedule() usage Keep the accounting schema consistent across different protocols with __sk_mem_schedule(). Besides, it adjusts a little bit on how to calculate forward allocated memory compared to before. After applied this patch, we could avoid receive path scheduling extra amount of memory. Link: https://lore.kernel.org/lkml/20230221110344.82818-1-kerneljasonxing@gmail.com/ Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230308021153.99777-1-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c605d171eb2d9..dc8feb54d835f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1531,10 +1531,21 @@ static void busylock_release(spinlock_t *busy) spin_unlock(busy); } +static int udp_rmem_schedule(struct sock *sk, int size) +{ + int delta; + + delta = size - sk->sk_forward_alloc; + if (delta > 0 && !__sk_mem_schedule(sk, delta, SK_MEM_RECV)) + return -ENOBUFS; + + return 0; +} + int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; - int rmem, delta, amt, err = -ENOMEM; + int rmem, err = -ENOMEM; spinlock_t *busy = NULL; int size; @@ -1567,16 +1578,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) goto uncharge_drop; spin_lock(&list->lock); - if (size >= sk->sk_forward_alloc) { - amt = sk_mem_pages(size); - delta = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { - err = -ENOBUFS; - spin_unlock(&list->lock); - goto uncharge_drop; - } - - sk->sk_forward_alloc += delta; + err = udp_rmem_schedule(sk, size); + if (err) { + spin_unlock(&list->lock); + goto uncharge_drop; } sk->sk_forward_alloc -= size; -- GitLab From a1331535aeb41b08fe0c2c78af51885edc93615b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 16:10:51 -0600 Subject: [PATCH 0313/2078] net: restore alpha order to Ethernet devices in config The filename "wangxun" sorts between "intel" and "xscale", but xscale/Kconfig contains "Intel XScale" prompts, so Wangxun ends up in the wrong place in the config front-ends. Move wangxun/Kconfig so the Wangxun devices appear in order in the user interface. Fixes: 3ce7547e5b71 ("net: txgbe: Add build support for txgbe") Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230307221051.890135-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 1917da7841919..5a274b99f2992 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" source "drivers/net/ethernet/tundra/Kconfig" source "drivers/net/ethernet/vertexcom/Kconfig" source "drivers/net/ethernet/via/Kconfig" +source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/wiznet/Kconfig" source "drivers/net/ethernet/xilinx/Kconfig" source "drivers/net/ethernet/xircom/Kconfig" -- GitLab From 2498e6231bfd44f8f85afbc838b37441551a4028 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:12 -0800 Subject: [PATCH 0314/2078] selftests/bpf: prevent unused variable warning in bpf_for() Add __attribute__((unused)) to inner __p variable inside bpf_for(), bpf_for_each(), and bpf_repeat() macros to avoid compiler warnings about unused variable. Reported-by: Tejun Heo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 43b154a639e78..c95eb603403cc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -115,7 +115,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ cleanup(bpf_iter_##type##_destroy))), \ /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ - *___p = (bpf_iter_##type##_new(&___it, ##args), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_##type##_new(&___it, ##args), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_##type##_destroy, (void *)0); \ @@ -143,7 +144,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ cleanup(bpf_iter_num_destroy))), \ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p = (bpf_iter_num_new(&___it, (start), (end)), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, (start), (end)), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_num_destroy, (void *)0); \ @@ -167,7 +169,8 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ cleanup(bpf_iter_num_destroy))), \ /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p = (bpf_iter_num_new(&___it, 0, (N)), \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, 0, (N)), \ /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ (void)bpf_iter_num_destroy, (void *)0); \ -- GitLab From 713461b895ef958ef444b00cc2d979f3ca3a82e2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:13 -0800 Subject: [PATCH 0315/2078] selftests/bpf: add __sink() macro to fake variable consumption Add __sink(expr) macro that forces compiler to believe that passed in expression is both read and written. It used a simple embedded asm for this. This is useful in a lot of tests where we assign value to some variable to trigger some action, but later don't read variable, causing compiler to complain (if corresponding compiler warnings are turned on, which we'll do in the next patch). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c95eb603403cc..3c03ec8056ce7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -76,6 +76,9 @@ #define FUNC_REG_ARG_CNT 5 #endif +/* make it look to compiler like value is read and written */ +#define __sink(expr) asm volatile("" : "+g"(expr)) + struct bpf_iter_num; extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; -- GitLab From c8ed66859397237c649998c58a68a86b8ea5f417 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:14 -0800 Subject: [PATCH 0316/2078] selftests/bpf: fix lots of silly mistakes pointed out by compiler Once we enable -Wall for BPF sources, compiler will complain about lots of unused variables, variables that are set but never read, etc. Fix all these issues first before enabling -Wall in Makefile. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/bpf_iter_ksym.c | 1 - .../selftests/bpf/progs/bpf_iter_setsockopt.c | 1 - tools/testing/selftests/bpf/progs/bpf_loop.c | 2 - tools/testing/selftests/bpf/progs/cb_refs.c | 1 - .../bpf/progs/cgroup_skb_sk_lookup_kern.c | 1 - .../selftests/bpf/progs/cgrp_kfunc_failure.c | 1 + .../bpf/progs/cgrp_ls_attach_cgroup.c | 1 - .../selftests/bpf/progs/cgrp_ls_sleepable.c | 1 - tools/testing/selftests/bpf/progs/core_kern.c | 2 +- .../selftests/bpf/progs/cpumask_failure.c | 3 ++ .../selftests/bpf/progs/cpumask_success.c | 1 - .../testing/selftests/bpf/progs/dynptr_fail.c | 5 ++- .../selftests/bpf/progs/dynptr_success.c | 5 +-- .../selftests/bpf/progs/fexit_bpf2bpf.c | 2 - .../bpf/progs/freplace_attach_probe.c | 2 +- tools/testing/selftests/bpf/progs/iters.c | 11 +++-- .../selftests/bpf/progs/linked_funcs1.c | 3 ++ .../selftests/bpf/progs/linked_funcs2.c | 3 ++ .../testing/selftests/bpf/progs/linked_list.c | 4 -- .../selftests/bpf/progs/linked_list_fail.c | 1 - .../selftests/bpf/progs/local_storage.c | 1 - tools/testing/selftests/bpf/progs/map_kptr.c | 3 -- .../testing/selftests/bpf/progs/netcnt_prog.c | 1 - .../selftests/bpf/progs/netif_receive_skb.c | 1 - .../selftests/bpf/progs/perfbuf_bench.c | 1 - tools/testing/selftests/bpf/progs/pyperf.h | 2 +- .../progs/rbtree_btf_fail__wrong_node_type.c | 11 ----- .../testing/selftests/bpf/progs/rbtree_fail.c | 3 +- .../selftests/bpf/progs/rcu_read_lock.c | 4 -- .../bpf/progs/read_bpf_task_storage_busy.c | 1 - .../selftests/bpf/progs/recvmsg4_prog.c | 2 - .../selftests/bpf/progs/recvmsg6_prog.c | 2 - .../selftests/bpf/progs/sendmsg4_prog.c | 2 - .../bpf/progs/sockmap_verdict_prog.c | 4 ++ .../testing/selftests/bpf/progs/strobemeta.h | 1 - .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 11 +++++ .../selftests/bpf/progs/tailcall_bpf2bpf6.c | 3 ++ .../selftests/bpf/progs/task_kfunc_failure.c | 1 + .../selftests/bpf/progs/task_kfunc_success.c | 6 --- .../testing/selftests/bpf/progs/test_bpf_nf.c | 1 - .../bpf/progs/test_cls_redirect_dynptr.c | 1 - .../progs/test_core_reloc_bitfields_probed.c | 1 - .../selftests/bpf/progs/test_global_func1.c | 4 ++ .../selftests/bpf/progs/test_global_func2.c | 4 ++ .../selftests/bpf/progs/test_hash_large_key.c | 2 +- .../bpf/progs/test_ksyms_btf_write_check.c | 1 - .../selftests/bpf/progs/test_legacy_printk.c | 2 +- .../selftests/bpf/progs/test_map_lock.c | 2 +- .../testing/selftests/bpf/progs/test_obj_id.c | 2 + .../bpf/progs/test_parse_tcp_hdr_opt.c | 1 - .../bpf/progs/test_parse_tcp_hdr_opt_dynptr.c | 2 +- .../selftests/bpf/progs/test_pkt_access.c | 5 +++ .../selftests/bpf/progs/test_ringbuf.c | 1 - .../bpf/progs/test_ringbuf_map_key.c | 1 + .../selftests/bpf/progs/test_ringbuf_multi.c | 1 - .../bpf/progs/test_select_reuseport_kern.c | 2 +- .../selftests/bpf/progs/test_sk_assign.c | 4 +- .../selftests/bpf/progs/test_sk_lookup.c | 9 +--- .../selftests/bpf/progs/test_sk_lookup_kern.c | 2 - .../selftests/bpf/progs/test_sock_fields.c | 2 +- .../selftests/bpf/progs/test_sockmap_kern.h | 14 ++++-- .../selftests/bpf/progs/test_spin_lock.c | 3 ++ .../selftests/bpf/progs/test_tc_dtime.c | 4 +- .../selftests/bpf/progs/test_tc_neigh.c | 4 +- .../selftests/bpf/progs/test_tcpbpf_kern.c | 2 - .../selftests/bpf/progs/test_tunnel_kern.c | 6 --- .../selftests/bpf/progs/test_usdt_multispec.c | 2 - .../selftests/bpf/progs/test_verif_scale1.c | 2 +- .../selftests/bpf/progs/test_verif_scale2.c | 2 +- .../selftests/bpf/progs/test_verif_scale3.c | 2 +- .../bpf/progs/test_xdp_adjust_tail_grow.c | 2 - .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 - .../selftests/bpf/progs/test_xdp_dynptr.c | 2 - .../selftests/bpf/progs/test_xdp_noinline.c | 43 ------------------- .../selftests/bpf/progs/test_xdp_vlan.c | 13 ------ tools/testing/selftests/bpf/progs/type_cast.c | 1 - tools/testing/selftests/bpf/progs/udp_limit.c | 2 - .../bpf/progs/user_ringbuf_success.c | 6 --- .../selftests/bpf/progs/xdp_features.c | 1 - .../testing/selftests/bpf/progs/xdping_kern.c | 2 - tools/testing/selftests/bpf/progs/xdpwall.c | 1 - 81 files changed, 90 insertions(+), 187 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 9ba14c37bbcc9..5ddcc46fd8868 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -33,7 +33,6 @@ int dump_ksym(struct bpf_iter__ksym *ctx) __u32 seq_num = ctx->meta->seq_num; unsigned long value; char type; - int ret; if (!iter) return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index b77adfd55d732..ec7f91850dec9 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -42,7 +42,6 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx) char cur_cc[TCP_CA_NAME_MAX]; struct tcp_sock *tp; struct sock *sk; - int ret; if (!bpf_tcp_sk(ctx->sk_common)) return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index de1fc82d27105..1d194455b109c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -138,8 +138,6 @@ static int callback_set_0f(int i, void *ctx) SEC("fentry/" SYS_PREFIX "sys_nanosleep") int prog_non_constant_callback(void *ctx) { - struct callback_ctx data = {}; - if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index ce96b33e38d66..50f95ec611650 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -52,7 +52,6 @@ int leak_prog(void *ctx) { struct prog_test_ref_kfunc *p; struct map_value *v; - unsigned long sl; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 88638315c582c..ac86a8a616057 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -66,7 +66,6 @@ static inline int is_allowed_peer_cg(struct __sk_buff *skb, SEC("cgroup_skb/ingress") int ingress_lookup(struct __sk_buff *skb) { - __u32 serv_port_key = 0; struct ipv6hdr ip6h; struct tcphdr tcph; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index b42291ed95869..807fb0ac41e9c 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -109,6 +109,7 @@ int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *pat acquired = bpf_cgroup_acquire(cgrp); /* Acquired cgroup is never released. */ + __sink(acquired); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c index 6652d18465b24..8aeba1b75c838 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c @@ -84,7 +84,6 @@ int BPF_PROG(update_cookie_tracing, struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct socket_cookie *p; - struct tcp_sock *tcp_sk; if (uaddr->sa_family != AF_INET6) return 0; diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 7615dc23d301e..4c7844e1dbfa5 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -24,7 +24,6 @@ void bpf_rcu_read_unlock(void) __ksym; SEC("?iter.s/cgroup") int cgroup_iter(struct bpf_iter__cgroup *ctx) { - struct seq_file *seq = ctx->meta->seq; struct cgroup *cgrp = ctx->cgroup; long *ptr; diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c index 2715fe27d4cf8..004f2acef2eb0 100644 --- a/tools/testing/selftests/bpf/progs/core_kern.c +++ b/tools/testing/selftests/bpf/progs/core_kern.c @@ -77,7 +77,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index c16f7563b84ec..cfe83f0ef9e25 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -23,6 +23,7 @@ int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags) struct bpf_cpumask *cpumask; cpumask = create_cpumask(); + __sink(cpumask); /* cpumask is never released. */ return 0; @@ -51,6 +52,7 @@ int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_fla /* Can't acquire a non-struct bpf_cpumask. */ cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr); + __sink(cpumask); return 0; } @@ -63,6 +65,7 @@ int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags) /* Can't set the CPU of a non-struct bpf_cpumask. */ bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr); + __sink(cpumask); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 1d38bc65d4b0e..97ed08c4ff03c 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -353,7 +353,6 @@ SEC("tp_btf/task_newtask") int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; cpumask = create_cpumask(); if (!cpumask) diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 20ce920d891da..759eb5c245cd9 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -271,7 +271,7 @@ SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) { - __u32 key = 0, map_val; + __u32 map_val; struct bpf_dynptr ptr; void *data; @@ -388,7 +388,6 @@ int data_slice_missing_null_check2(void *ctx) /* this should fail */ *data2 = 3; -done: bpf_ringbuf_discard_dynptr(&ptr, 0); return 0; } @@ -440,6 +439,7 @@ int invalid_write1(void *ctx) /* this should fail */ data = bpf_dynptr_data(&ptr, 0, 1); + __sink(data); return 0; } @@ -1374,6 +1374,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) * changing packet data */ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + __sink(hdr); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index c8358a7c79245..b2fa6c47ecc0b 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -35,7 +35,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep") int test_read_write(void *ctx) { char write_data[64] = "hello there, world!!"; - char read_data[64] = {}, buf[64] = {}; + char read_data[64] = {}; struct bpf_dynptr ptr; int i; @@ -170,7 +170,6 @@ int test_skb_readonly(struct __sk_buff *skb) { __u8 write_data[2] = {1, 2}; struct bpf_dynptr ptr; - __u64 *data; int ret; if (bpf_dynptr_from_skb(skb, 0, &ptr)) { @@ -191,10 +190,8 @@ int test_skb_readonly(struct __sk_buff *skb) SEC("?cgroup_skb/egress") int test_dynptr_skb_data(struct __sk_buff *skb) { - __u8 write_data[2] = {1, 2}; struct bpf_dynptr ptr; __u64 *data; - int ret; if (bpf_dynptr_from_skb(skb, 0, &ptr)) { err = 1; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 4547b059d4877..983b7c2333828 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -120,8 +120,6 @@ int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var) void *data = (void *)(long)skb->data; struct ipv6hdr ip6, *ip6p; int ifindex = skb->ifindex; - __u32 eth_proto; - __u32 nh_off; /* check that BPF extension can read packet via direct packet access */ if (data + 14 + sizeof(ip6) > data_end) diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c index bb2a77c5b62b0..370a0e1922e0e 100644 --- a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c @@ -23,7 +23,7 @@ struct { SEC("freplace/handle_kprobe") int new_handle_kprobe(struct pt_regs *ctx) { - struct hmap_elem zero = {}, *val; + struct hmap_elem *val; int key = 0; val = bpf_map_lookup_elem(&hash_map, &key); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 84e5dc10243ca..6b9b3c56f009a 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -45,7 +45,6 @@ __failure __msg("unbounded memory access") int iter_err_unsafe_asm_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i = 0; MY_PID_GUARD(); @@ -88,7 +87,7 @@ __success int iter_while_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -106,7 +105,7 @@ __success int iter_while_loop_auto_cleanup(const void *ctx) { __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -124,7 +123,7 @@ __success int iter_for_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -192,7 +191,7 @@ __success int iter_manual_unroll_loop(const void *ctx) { struct bpf_iter_num it; - int *v, i; + int *v; MY_PID_GUARD(); @@ -621,7 +620,7 @@ __success int iter_stack_array_loop(const void *ctx) { long arr1[16], arr2[16], sum = 0; - int *v, i; + int i; MY_PID_GUARD(); diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index b05571bc67d5b..c4b49ceea967b 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" /* weak and shared between two files */ const volatile int my_tid __weak; @@ -51,6 +52,7 @@ __weak int set_output_weak(int x) * cause problems for BPF static linker */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); output_weak1 = x; return x; @@ -71,6 +73,7 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id) /* make sure we have CO-RE relocations in main program */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); set_output_val2(1000); set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index ee7e3848ee4fe..013ff0645f0cc 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" /* weak and shared between both files */ const volatile int my_tid __weak; @@ -51,6 +52,7 @@ __weak int set_output_weak(int x) * cause problems for BPF static linker */ whatever = 2 * bpf_core_type_size(struct task_struct); + __sink(whatever); output_weak2 = x; return 2 * x; @@ -71,6 +73,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) /* make sure we have CO-RE relocations in main program */ whatever = bpf_core_type_size(struct task_struct); + __sink(whatever); set_output_val1(2000); set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 4fa4a9b01bde3..53ded51a3abbc 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -313,7 +313,6 @@ SEC("tc") int map_list_push_pop_multiple(void *ctx) { struct map_value *v; - int ret; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) @@ -326,7 +325,6 @@ int inner_map_list_push_pop_multiple(void *ctx) { struct map_value *v; void *map; - int ret; map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); if (!map) @@ -352,7 +350,6 @@ SEC("tc") int map_list_in_list(void *ctx) { struct map_value *v; - int ret; v = bpf_map_lookup_elem(&array_map, &(int){0}); if (!v) @@ -365,7 +362,6 @@ int inner_map_list_in_list(void *ctx) { struct map_value *v; void *map; - int ret; map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); if (!map) diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 69cdc07cba13a..41978b46f58ee 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -557,7 +557,6 @@ SEC("?tc") int incorrect_head_off2(void *ctx) { struct foo *f; - struct bar *b; f = bpf_obj_new(typeof(*f)); if (!f) diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 19423ed862e35..01c74bc870aea 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -77,7 +77,6 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; int err; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 3903d30217b8f..dae5dab1bbf7d 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -515,7 +515,6 @@ int test_ls_map_kptr_ref1(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) @@ -534,7 +533,6 @@ int test_ls_map_kptr_ref2(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) @@ -550,7 +548,6 @@ int test_ls_map_kptr_ref_del(void *ctx) { struct task_struct *current; struct map_value *v; - int ret; current = bpf_get_current_task_btf(); if (!current) diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index f718b2c212dc8..f9ef8aee56f16 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -26,7 +26,6 @@ SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { union percpu_net_cnt *percpu_cnt; - char fmt[] = "%d %llu %llu\n"; union net_cnt *cnt; __u64 ts, dt; int ret; diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index 1d8918dfbd3ff..c0062645fc68b 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -53,7 +53,6 @@ static int __strncmp(const void *m1, const void *m2, size_t len) do { \ static const char _expectedval[EXPECTED_STRSIZE] = \ _expected; \ - static const char _ptrtype[64] = #_type; \ __u64 _hflags = _flags | BTF_F_COMPACT; \ static _type _ptrdata = __VA_ARGS__; \ static struct btf_ptr _ptr = { }; \ diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c index 45204fe0c570b..29c1639fc78a0 100644 --- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c +++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c @@ -22,7 +22,6 @@ long dropped __attribute__((aligned(128))) = 0; SEC("fentry/" SYS_PREFIX "sys_getpgid") int bench_perfbuf(void *ctx) { - __u64 *sample; int i; for (i = 0; i < batch_cnt; i++) { diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index f2e7a31c8d751..026d573ce1797 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -345,7 +345,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { - int i, ret = 0; + int ret = 0; ret |= __on_event(ctx); ret |= __on_event(ctx); ret |= __on_event(ctx); diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c index 340f97da1084f..7651843f5a800 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c +++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c @@ -16,17 +16,6 @@ struct node_data { struct bpf_list_node node; }; -static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) -{ - struct node_data *node_a; - struct node_data *node_b; - - node_a = container_of(a, struct node_data, node); - node_b = container_of(b, struct node_data, node); - - return node_a->key < node_b->key; -} - #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_data, node); diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 1ced900f3fced..46d7d18a218fd 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=2 alloc_insn=11") +__failure __msg("Unreleased reference id=2 alloc_insn=10") long rbtree_api_remove_no_drop(void *ctx) { struct bpf_rb_node *res; @@ -119,6 +119,7 @@ long rbtree_api_remove_no_drop(void *ctx) res = bpf_rbtree_remove(&groot, res); n = container_of(res, struct node_data, node); + __sink(n); bpf_spin_unlock(&glock); /* bpf_obj_drop(n) is missing here */ diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 7250bb76d18ad..6a8c88e58df24 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -179,8 +179,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int miss_lock(void *ctx) { struct task_struct *task; - struct css_set *cgroups; - struct cgroup *dfl_cgrp; /* missing bpf_rcu_read_lock() */ task = bpf_get_current_task_btf(); @@ -195,8 +193,6 @@ SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int miss_unlock(void *ctx) { struct task_struct *task; - struct css_set *cgroups; - struct cgroup *dfl_cgrp; /* missing bpf_rcu_read_unlock() */ task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c index a47bb01207199..76556e0b42b24 100644 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -23,7 +23,6 @@ SEC("raw_tp/sys_enter") int BPF_PROG(read_bpf_task_storage_busy) { int *value; - int key; if (!CONFIG_PREEMPT) return 0; diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c index 3d1ae8b3402f9..59748c95471a2 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c @@ -17,8 +17,6 @@ SEC("cgroup/recvmsg4") int recvmsg4_prog(struct bpf_sock_addr *ctx) { struct bpf_sock *sk; - __u32 user_ip4; - __u16 user_port; sk = ctx->sk; if (!sk) diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c index 27dfb21b21b47..d9a4016596d51 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c @@ -20,8 +20,6 @@ SEC("cgroup/recvmsg6") int recvmsg6_prog(struct bpf_sock_addr *ctx) { struct bpf_sock *sk; - __u32 user_ip4; - __u16 user_port; sk = ctx->sk; if (!sk) diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ea75a44cb7fce..351e79aef2fae 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -21,8 +21,6 @@ SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { - int prio; - if (ctx->type != SOCK_DGRAM) return 0; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index e2468a6d01a59..0660f29dca955 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -1,6 +1,7 @@ #include #include #include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); @@ -40,6 +41,9 @@ int bpf_prog2(struct __sk_buff *skb) __u8 *d = data; __u8 sk, map; + __sink(lport); + __sink(rport); + if (data + 8 > data_end) return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e562be6356f30..e02cfd3807469 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -391,7 +391,6 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_raw map; void *location; uint64_t len; - int i; descr->tag_len = 0; /* presume no tag is set */ descr->cnt = -1; /* presume no value is set */ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 7fab39a3bb12a..99c8d1d8a187b 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -2,6 +2,7 @@ #include #include #include "bpf_legacy.h" +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -20,6 +21,8 @@ int subprog_tail2(struct __sk_buff *skb) else bpf_tail_call_static(skb, &jmp_table, 1); + __sink(arr[sizeof(arr) - 1]); + return skb->len; } @@ -30,6 +33,8 @@ int subprog_tail(struct __sk_buff *skb) bpf_tail_call_static(skb, &jmp_table, 0); + __sink(arr[sizeof(arr) - 1]); + return skb->len * 2; } @@ -38,6 +43,8 @@ int classifier_0(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return subprog_tail2(skb); } @@ -46,6 +53,8 @@ int classifier_1(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return skb->len * 3; } @@ -54,6 +63,8 @@ int entry(struct __sk_buff *skb) { volatile char arr[128] = {}; + __sink(arr[sizeof(arr) - 1]); + return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c index 41ce83da78e8b..4a9f63bea66cd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include "bpf_misc.h" #define __unused __attribute__((unused)) @@ -36,6 +37,8 @@ int entry(struct __sk_buff *skb) /* Have data on stack which size is not a multiple of 8 */ volatile char arr[1] = {}; + __sink(arr[0]); + return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index f19d54eda4f17..002c7f69e47f5 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -109,6 +109,7 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_ acquired = bpf_task_acquire(task); /* Acquired task is never released. */ + __sink(acquired); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 9f359cfd29e7c..aebc4bb14e7dd 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -171,8 +171,6 @@ static void lookup_compare_pid(const struct task_struct *p) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) { - struct task_struct *acquired; - if (!is_test_kfunc_task()) return 0; @@ -183,8 +181,6 @@ int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags) { - struct task_struct *current, *acquired; - if (!is_test_kfunc_task()) return 0; @@ -208,8 +204,6 @@ static int is_pid_lookup_valid(s32 pid) SEC("tp_btf/task_newtask") int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags) { - struct task_struct *acquired; - if (!is_test_kfunc_task()) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 9fc603c9d673e..77ad8adf68daa 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -75,7 +75,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; - int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index f45a7095de7a7..f41c81212ee92 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -455,7 +455,6 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynpt static ret_t skip_next_hops(__u64 *offset, int n) { - __u32 res; switch (n) { case 1: *offset += sizeof(struct in_addr); diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c index ab1e647aeb317..b86fdda2a6ea5 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c @@ -42,7 +42,6 @@ int test_core_bitfields(void *ctx) { struct core_reloc_bitfields *in = (void *)&data.in; struct core_reloc_bitfields_output *out = (void *)&data.out; - uint64_t res; out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1); out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2); diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 23970a20b3249..b85fc8c423ba7 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return f0(0, skb) + skb->len; } @@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c index 3dce97fb52a4b..2beab9c3b68a7 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func2.c +++ b/tools/testing/selftests/bpf/progs/test_global_func2.c @@ -18,6 +18,8 @@ int f1(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return f0(0, skb) + skb->len; } @@ -34,6 +36,8 @@ int f3(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c index 473a22794a628..8b438128f46b8 100644 --- a/tools/testing/selftests/bpf/progs/test_hash_large_key.c +++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c @@ -28,7 +28,7 @@ struct bigelement { SEC("raw_tracepoint/sys_enter") int bpf_hash_large_key_test(void *ctx) { - int zero = 0, err = 1, value = 42; + int zero = 0, value = 42; struct bigelement *key; key = bpf_map_lookup_elem(&key_map, &zero); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c index a72a5bf3812a8..27109b877714f 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c @@ -35,7 +35,6 @@ SEC("raw_tp/sys_enter") int handler2(const void *ctx) { int *active; - __u32 cpu; active = bpf_this_cpu_ptr(&bpf_prog_active); write_active(active); diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c index 64c2d9ced529c..42718cd8e6a49 100644 --- a/tools/testing/selftests/bpf/progs/test_legacy_printk.c +++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c @@ -56,7 +56,7 @@ int handle_legacy(void *ctx) SEC("tp/raw_syscalls/sys_enter") int handle_modern(void *ctx) { - int zero = 0, cur_pid; + int cur_pid; cur_pid = bpf_get_current_pid_tgid() >> 32; if (cur_pid != my_pid_var) diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index acf073db9e8b9..1c02511b73cdb 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -33,7 +33,7 @@ struct { SEC("cgroup/skb") int bpf_map_lock_test(struct __sk_buff *skb) { - struct hmap_elem zero = {}, *val; + struct hmap_elem *val; int rnd = bpf_get_prandom_u32(); int key = 0, err = 1, i; struct array_elem *q; diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index ded71b3ff6b47..2850ae788a919 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -4,6 +4,7 @@ #include #include #include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -19,6 +20,7 @@ int test_obj_id(void *ctx) __u64 *value; value = bpf_map_lookup_elem(&test_map_id, &key); + __sink(value); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c index 79bab9b50e9e5..d9b2ba7ac340e 100644 --- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c @@ -87,7 +87,6 @@ int xdp_ingress_v6(struct xdp_md *xdp) __u8 tcp_hdr_opt_len = 0; struct tcphdr *tcp_hdr; __u64 tcp_offset = 0; - __u32 off; int err; tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c index d3b319722e309..dc6e43bc6a62c 100644 --- a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c @@ -30,7 +30,7 @@ __u32 server_id; static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining, __u32 *server_id) { - __u8 *tcp_opt, kind, hdr_len; + __u8 kind, hdr_len; __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)]; __u8 *data; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 5cd7c096f62d2..bce7173152c6c 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -13,6 +13,7 @@ #include #include #include +#include "bpf_misc.h" /* llvm will optimize both subprograms into exactly the same BPF assembly * @@ -51,6 +52,8 @@ int get_skb_len(struct __sk_buff *skb) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->len; } @@ -73,6 +76,8 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var) { volatile char buf[MAX_STACK] = {}; + __sink(buf[MAX_STACK - 1]); + return skb->ifindex * val * var; } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 5bdc0d38efc05..501cefa976333 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -41,7 +41,6 @@ int test_ringbuf(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; struct sample *sample; - int zero = 0; if (cur_pid != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c index 2760bf60d05ad..21bb7da90ea50 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -53,6 +53,7 @@ int test_ringbuf_mem_map_key(void *ctx) /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg */ lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); + __sink(lookup_val); /* workaround - memcpy is necessary so that verifier doesn't * complain with: diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index e416e0ce12b7d..9626baa6779c8 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -59,7 +59,6 @@ int test_ringbuf(void *ctx) int cur_pid = bpf_get_current_pid_tgid() >> 32; struct sample *sample; void *rb; - int zero = 0; if (cur_pid != pid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 7d56ed47cd4d8..5eb25c6ad75b1 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -64,7 +64,7 @@ SEC("sk_reuseport") int _select_by_skb_data(struct sk_reuseport_md *reuse_md) { __u32 linum, index = 0, flags = 0, index_zero = 0; - __u32 *result_cnt, *linum_value; + __u32 *result_cnt; struct data_check data_check = {}; struct cmd *cmd, cmd_copy; void *data, *data_end; diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 21b19b758c4eb..3079244c7f96e 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -15,6 +15,7 @@ #include #include #include +#include "bpf_misc.h" #if defined(IPROUTE2_HAVE_LIBBPF) /* Use a new-style map definition. */ @@ -57,7 +58,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) void *data = (void *)(long)skb->data; struct bpf_sock_tuple *result; struct ethhdr *eth; - __u64 tuple_len; __u8 proto = 0; __u64 ihl_len; @@ -94,6 +94,7 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) return NULL; *tcp = (proto == IPPROTO_TCP); + __sink(ihl_len); return result; } @@ -173,7 +174,6 @@ int bpf_sk_assign_test(struct __sk_buff *skb) struct bpf_sock_tuple *tuple; bool ipv4 = false; bool tcp = false; - int tuple_len; int ret = 0; tuple = get_tuple(skb, &ipv4, &tcp); diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 6058dcb11b36c..71f844b9b902b 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -391,7 +391,6 @@ SEC("sk_lookup") int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; - int err, family; __u32 val_u32; bool v4; @@ -645,9 +644,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) SEC("sk_lookup") int multi_prog_redir1(struct bpf_sk_lookup *ctx) { - int ret; - - ret = select_server_a(ctx); + (void)select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } @@ -655,9 +652,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx) SEC("sk_lookup") int multi_prog_redir2(struct bpf_sk_lookup *ctx) { - int ret; - - ret = select_server_a(ctx); + (void)select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 6ccf6d546074e..e9efc32630225 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -110,7 +110,6 @@ int err_modify_sk_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - __u32 family; sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { @@ -125,7 +124,6 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - __u32 family; sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 9f4b8f9f11811..bbad3c2d9aa57 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -121,7 +121,7 @@ static void tpcpy(struct bpf_tcp_sock *dst, SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { - struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; + struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F }; struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 6c85b00f27b2e..baf9ebc6d903f 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -14,6 +14,7 @@ #include #include #include +#include "bpf_misc.h" /* Sockmap sample program connects a client and a backend together * using cgroups. @@ -111,12 +112,15 @@ int bpf_prog2(struct __sk_buff *skb) int len, *f, ret, zero = 0; __u64 flags = 0; + __sink(rport); if (lport == 10000) ret = 10; else ret = 1; len = (__u32)skb->data_end - (__u32)skb->data; + __sink(len); + f = bpf_map_lookup_elem(&sock_skb_opts, &zero); if (f && *f) { ret = 3; @@ -180,7 +184,6 @@ int bpf_prog3(struct __sk_buff *skb) if (err) return SK_DROP; bpf_write_pass(skb, 13); -tls_out: return ret; } @@ -188,8 +191,7 @@ SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) { __u32 lport, rport; - int op, err = 0, index, key, ret; - + int op, err, ret; op = (int) skops->op; @@ -228,6 +230,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) break; } + __sink(err); + return 0; } @@ -321,6 +325,10 @@ int bpf_prog8(struct sk_msg_md *msg) } else { return SK_DROP; } + + __sink(data_end); + __sink(data); + return SK_PASS; } SEC("sk_msg4") diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 5bd10409285b4..b2440a0ff422a 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -3,6 +3,7 @@ #include #include #include +#include "bpf_misc.h" struct hmap_elem { volatile int cnt; @@ -89,6 +90,8 @@ int bpf_spin_lock_test(struct __sk_buff *skb) credit = q->credit; bpf_spin_unlock(&q->lock); + __sink(credit); + /* spin_lock in cgroup local storage */ cls = bpf_get_local_storage(&cls_map, 0); bpf_spin_lock(&cls->lock); diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 125beec31834b..74ec09f040b7a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -163,9 +163,9 @@ static int skb_get_type(struct __sk_buff *skb) ip6h = data + sizeof(struct ethhdr); if (ip6h + 1 > data_end) return -1; - if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) + if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}})) ns = SRC_NS; - else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) + else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}})) ns = DST_NS; inet_proto = ip6h->nexthdr; trans = ip6h + 1; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 3e32ea375ab4e..de15155f2609f 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -94,7 +94,7 @@ int tc_dst(struct __sk_buff *skb) redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); break; case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); + redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_src}}); break; } @@ -119,7 +119,7 @@ int tc_src(struct __sk_buff *skb) redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); break; case __bpf_constant_htons(ETH_P_IPV6): - redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); + redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_dst}}); break; } diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 3ded052807576..cf7ed8cbb1fe7 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -46,8 +46,6 @@ int bpf_testcb(struct bpf_sock_ops *skops) struct bpf_sock_ops *reuse = skops; struct tcphdr *thdr; int window_clamp = 9216; - int good_call_rv = 0; - int bad_call_rv = 0; int save_syn = 1; int rv = -1; int v = 0; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 95b4aa0928baa..9ab2d55ab7c0e 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -209,7 +209,6 @@ int erspan_get_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; - __u32 index; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); @@ -289,7 +288,6 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; - __u32 index; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), @@ -405,8 +403,6 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; - __u32 orig_daddr; - __u32 index = 0; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_FLAGS); @@ -443,9 +439,7 @@ int veth_set_outer_dst(struct __sk_buff *skb) void *data_end = (void *)(long)skb->data_end; struct udphdr *udph; struct iphdr *iph; - __u32 index = 0; int ret = 0; - int shrink; __s64 csum; if ((void *)eth + sizeof(*eth) > data_end) { diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c index aa6de32b50d1d..962f3462066a8 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c +++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c @@ -18,8 +18,6 @@ int usdt_100_sum; SEC("usdt//proc/self/exe:test:usdt_100") int BPF_USDT(usdt_100, int x) { - long tmp; - if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c index ac6135d9374c4..323a73fb2e8c2 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index f90ffcafd1e82..f5318f7570840 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 14; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c index ca33a9b711c4b..2e06dbb1ad5c8 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -11,7 +11,7 @@ int balancer_ingress(struct __sk_buff *ctx) void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; void *ptr; - int ret = 0, nh_off, i = 0; + int nh_off, i = 0; nh_off = 32; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 297c260fc3645..81bb38d72cedd 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -5,8 +5,6 @@ SEC("xdp") int _xdp_adjust_tail_grow(struct xdp_md *xdp) { - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; int data_len = bpf_xdp_get_buff_len(xdp); int offset = 0; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 3379d303f41a9..ee48c49639714 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -45,8 +45,6 @@ SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { struct meta meta; - void *data_end = (void *)(long)xdp->data_end; - void *data = (void *)(long)xdp->data; meta.ifindex = xdp->rxq->dev->ifindex; meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 7521a805b5068..25ee4a22e48d9 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -82,7 +82,6 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xd struct iptnl_info *tnl; struct ethhdr *new_eth; struct ethhdr *old_eth; - __u32 transport_hdr_sz; struct iphdr *iph; __u16 *next_iph; __u16 payload_len; @@ -165,7 +164,6 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xd struct iptnl_info *tnl; struct ethhdr *new_eth; struct ethhdr *old_eth; - __u32 transport_hdr_sz; struct ipv6hdr *ip6h; __u16 payload_len; struct vip vip = {}; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index ba48fcb98ab27..42c8f6ded0e4d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -371,45 +371,6 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, return true; } -static __attribute__ ((noinline)) -bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) -{ - struct eth_hdr *new_eth; - struct eth_hdr *old_eth; - - old_eth = *data; - new_eth = *data + sizeof(struct ipv6hdr); - memcpy(new_eth->eth_source, old_eth->eth_source, 6); - memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); - if (inner_v4) - new_eth->eth_proto = 8; - else - new_eth->eth_proto = 56710; - if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return false; - *data = (void *)(long)xdp->data; - *data_end = (void *)(long)xdp->data_end; - return true; -} - -static __attribute__ ((noinline)) -bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) -{ - struct eth_hdr *new_eth; - struct eth_hdr *old_eth; - - old_eth = *data; - new_eth = *data + sizeof(struct iphdr); - memcpy(new_eth->eth_source, old_eth->eth_source, 6); - memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); - new_eth->eth_proto = 8; - if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return false; - *data = (void *)(long)xdp->data; - *data_end = (void *)(long)xdp->data_end; - return true; -} - static __attribute__ ((noinline)) int swap_mac_and_send(void *data, void *data_end) { @@ -430,7 +391,6 @@ int send_icmp_reply(void *data, void *data_end) __u16 *next_iph_u16; __u32 tmp_addr = 0; struct iphdr *iph; - __u32 csum1 = 0; __u32 csum = 0; __u64 off = 0; @@ -662,7 +622,6 @@ static int process_l3_headers_v4(struct packet_description *pckt, void *data_end) { struct iphdr *iph; - __u64 iph_len; int action; iph = data + off; @@ -696,7 +655,6 @@ static int process_packet(void *data, __u64 off, void *data_end, struct packet_description pckt = { }; struct vip_definition vip = { }; struct lb_stats *data_stats; - struct eth_hdr *eth = data; void *lru_map = &lru_cache; struct vip_meta *vip_info; __u32 lru_stats_key = 513; @@ -704,7 +662,6 @@ static int process_packet(void *data, __u64 off, void *data_end, __u32 stats_key = 512; struct ctl_value *cval; __u16 pkt_bytes; - __u64 iph_len; __u8 protocol; __u32 vip_num; int action; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 4ddcb6dfe500f..f3ec8086482d6 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -209,19 +209,6 @@ int xdp_prognum2(struct xdp_md *ctx) return XDP_PASS; } -static __always_inline -void shift_mac_4bytes_16bit(void *data) -{ - __u16 *p = data; - - p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ - p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ - p[5] = p[3]; - p[4] = p[2]; - p[3] = p[1]; - p[2] = p[0]; -} - static __always_inline void shift_mac_4bytes_32bit(void *data) { diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c index eb78e6f031299..a9629ac230fdd 100644 --- a/tools/testing/selftests/bpf/progs/type_cast.c +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -63,7 +63,6 @@ SEC("?tp_btf/sys_enter") int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) { struct task_struct *task, *task_dup; - long *ptr; task = bpf_get_current_task_btf(); task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c index 165e3c2dd9a3d..4767451b59ac9 100644 --- a/tools/testing/selftests/bpf/progs/udp_limit.c +++ b/tools/testing/selftests/bpf/progs/udp_limit.c @@ -17,7 +17,6 @@ SEC("cgroup/sock_create") int sock(struct bpf_sock *ctx) { int *sk_storage; - __u32 key; if (ctx->type != SOCK_DGRAM) return 1; @@ -46,7 +45,6 @@ SEC("cgroup/sock_release") int sock_release(struct bpf_sock *ctx) { int *sk_storage; - __u32 key; if (ctx->type != SOCK_DGRAM) return 1; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c index 0ade1110613b8..dd3bdf672633c 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -162,8 +162,6 @@ SEC("fentry/" SYS_PREFIX "sys_prctl") int test_user_ringbuf_protocol(void *ctx) { long status = 0; - struct sample *sample = NULL; - struct bpf_dynptr ptr; if (!is_test_process()) return 0; @@ -183,10 +181,6 @@ int test_user_ringbuf_protocol(void *ctx) SEC("fentry/" SYS_PREFIX "sys_getpgid") int test_user_ringbuf(void *ctx) { - int status = 0; - struct sample *sample = NULL; - struct bpf_dynptr ptr; - if (!is_test_process()) return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c index 87c247d56f723..67424084a38a1 100644 --- a/tools/testing/selftests/bpf/progs/xdp_features.c +++ b/tools/testing/selftests/bpf/progs/xdp_features.c @@ -70,7 +70,6 @@ xdp_process_echo_packet(struct xdp_md *xdp, bool dut) struct tlv_hdr *tlv; struct udphdr *uh; __be16 port; - __u8 *cmd; if (eh + 1 > (struct ethhdr *)data_end) return -EINVAL; diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 4ad73847b8a5d..54cf1765118b8 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -89,7 +89,6 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type) SEC("xdp") int xdping_client(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct pinginfo *pinginfo = NULL; struct ethhdr *eth = data; @@ -153,7 +152,6 @@ int xdping_client(struct xdp_md *ctx) SEC("xdp") int xdping_server(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; struct icmphdr *icmph; diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c index 7a891a0c3a39c..c2dd0c28237a2 100644 --- a/tools/testing/selftests/bpf/progs/xdpwall.c +++ b/tools/testing/selftests/bpf/progs/xdpwall.c @@ -321,7 +321,6 @@ int edgewall(struct xdp_md *ctx) void *data = (void *)(long)(ctx->data); struct fw_match_info match_info = {}; struct pkt_info info = {}; - __u8 parse_err = NO_ERR; void *transport_hdr; struct ethhdr *eth; bool filter_res; -- GitLab From 3d5a55ddc25508fe950991603d0224c0bba60558 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 21:40:15 -0800 Subject: [PATCH 0317/2078] selftests/bpf: make BPF compiler flags stricter We recently added -Wuninitialized, but it's not enough to catch various silly mistakes or omissions. Let's go all the way to -Wall, just like we do for user-space code. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309054015.4068562-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 16f404aa1b237..606e2d738dd84 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -352,12 +352,12 @@ CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ +BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types -Wuninitialized + -Wno-compare-distinct-pointer-types $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline -- GitLab From 4b5ce570dbef57a20acdd71b0c65376009012354 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Mar 2023 22:01:49 -0800 Subject: [PATCH 0318/2078] bpf: ensure state checkpointing at iter_next() call sites State equivalence check and checkpointing performed in is_state_visited() employs certain heuristics to try to save memory by avoiding state checkpoints if not enough jumps and instructions happened since last checkpoint. This leads to unpredictability of whether a particular instruction will be checkpointed and how regularly. While normally this is not causing much problems (except inconveniences for predictable verifier tests, which we overcome with BPF_F_TEST_STATE_FREQ flag), turns out it's not the case for open-coded iterators. Checking and saving state checkpoints at iter_next() call is crucial for fast convergence of open-coded iterator loop logic, so we need to force it. If we don't do that, is_state_visited() might skip saving a checkpoint, causing unnecessarily long sequence of not checkpointed instructions and jumps, leading to exhaustion of jump history buffer, and potentially other undesired outcomes. It is expected that with correct open-coded iterators convergence will happen quickly, so we don't run a risk of exhausting memory. This patch adds, in addition to prune and jump instruction marks, also a "forced checkpoint" mark, and makes sure that any iter_next() call instruction is marked as such. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230310060149.625887-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 6 +++++- kernel/bpf/verifier.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0c052bc799401..81d525d057c7b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -477,8 +477,12 @@ struct bpf_insn_aux_data { /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ - bool prune_point; bool jmp_point; + bool prune_point; + /* ensure we check state equivalence and save state checkpoint and + * this instruction, regardless of any heuristics + */ + bool force_checkpoint; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 45a0822844648..13fd4c893f3b0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13865,6 +13865,17 @@ static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].prune_point; } +static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].force_checkpoint = true; +} + +static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].force_checkpoint; +} + + enum { DONE_EXPLORING = 0, KEEP_EXPLORING = 1, @@ -13984,8 +13995,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) struct bpf_kfunc_call_arg_meta meta; ret = fetch_kfunc_meta(env, insn, &meta, NULL); - if (ret == 0 && is_iter_next_kfunc(&meta)) + if (ret == 0 && is_iter_next_kfunc(&meta)) { mark_prune_point(env, t); + /* Checking and saving state checkpoints at iter_next() call + * is crucial for fast convergence of open-coded iterator loop + * logic, so we need to force it. If we don't do that, + * is_state_visited() might skip saving a checkpoint, causing + * unnecessarily long sequence of not checkpointed + * instructions and jumps, leading to exhaustion of jump + * history buffer, and potentially other undesired outcomes. + * It is expected that with correct open-coded iterators + * convergence will happen quickly, so we don't run a risk of + * exhausting memory. + */ + mark_force_checkpoint(env, t); + } } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); @@ -15172,7 +15196,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - bool add_new_state = env->test_state_freq ? true : false; + bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); + bool add_new_state = force_new_state; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -15269,7 +15294,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * at the end of the loop are likely to be useful in pruning. */ skip_inf_loop_check: - if (!env->test_state_freq && + if (!force_new_state && env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) add_new_state = false; -- GitLab From 52c2b005a3c18c565fc70cfd0ca49375f301e952 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Mar 2023 14:41:31 -0800 Subject: [PATCH 0319/2078] bpf: take into account liveness when propagating precision When doing state comparison, if old state has register that is not marked as REG_LIVE_READ, then we just skip comparison, regardless what's the state of corresponing register in current state. This is because not REG_LIVE_READ register is irrelevant for further program execution and correctness. All good here. But when we get to precision propagation, after two states were declared equivalent, we don't take into account old register's liveness, and thus attempt to propagate precision for register in current state even if that register in old state was not REG_LIVE_READ anymore. This is bad, because register in current state could be anything at all and this could cause -EFAULT due to internal logic bugs. Fix by taking into account REG_LIVE_READ liveness mark to keep the logic in state comparison in sync with precision propagation. Fixes: a3ce685dd01a ("bpf: fix precision tracking") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230309224131.57449-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 13fd4c893f3b0..0aaf7703326b0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15058,7 +15058,8 @@ static int propagate_precision(struct bpf_verifier_env *env, state_reg = state->regs; for (i = 0; i < BPF_REG_FP; i++, state_reg++) { if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) + !state_reg->precise || + !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating r%d\n", i, fr); @@ -15072,7 +15073,8 @@ static int propagate_precision(struct bpf_verifier_env *env, continue; state_reg = &state->stack[i].spilled_ptr; if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) + !state_reg->precise || + !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating fp%d\n", -- GitLab From 4a54de65964d37c3929379271ab31355e93ccddf Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 10 Mar 2023 00:19:09 -0600 Subject: [PATCH 0320/2078] bpf/selftests: Fix send_signal tracepoint tests The send_signal tracepoint tests are non-deterministically failing in CI. The test works as follows: 1. Two pairs of file descriptors are created using the pipe() function. One pair is used to communicate between a parent process -> child process, and the other for the reverse direction. 2. A child is fork()'ed. The child process registers a signal handler, notifies its parent that the signal handler is registered, and then and waits for its parent to have enabled a BPF program that sends a signal. 3. The parent opens and loads a BPF skeleton with programs that send signals to the child process. The different programs are triggered by different perf events (either NMI or normal perf), or by regular tracepoints. The signal is delivered to the child whenever the child triggers the program. 4. The child's signal handler is invoked, which sets a flag saying that the signal handler was reached. The child then signals to the parent that it received the signal, and the test ends. The perf testcases (send_signal_perf{_thread} and send_signal_nmi{_thread}) work 100% of the time, but the tracepoint testcases fail non-deterministically because the tracepoint is not always being fired for the child. There are two tracepoint programs registered in the test: 'tracepoint/sched/sched_switch', and 'tracepoint/syscalls/sys_enter_nanosleep'. The child never intentionally blocks, nor sleeps, so neither tracepoint is guaranteed to be triggered. To fix this, we can have the child trigger the nanosleep program with a usleep(). Before this patch, the test would fail locally every 2-3 runs. Now, it doesn't fail after more than 1000 runs. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230310061909.1420887-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d63a20fbed339..b15b343ebb6b1 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,8 +64,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 1000000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) { j /= i + j + 1; + if (!attr) + /* trigger the nanosleep tracepoint program. */ + usleep(1); + } buf[0] = sigusr1_received ? '2' : '0'; ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); -- GitLab From 4cbd23cc92c49173e402753cab62b8a7754ed18f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:20 -0800 Subject: [PATCH 0321/2078] bpf: Move a few bpf_local_storage functions to static scope This patch moves the bpf_local_storage_free_rcu() and bpf_selem_unlink_map() to static because they are not used outside of bpf_local_storage.c. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 3 --- kernel/bpf/bpf_local_storage.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index d934248b8e813..502ad7093f13f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -147,8 +147,6 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); - struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool charge_mem, gfp_t gfp_flags); @@ -163,7 +161,6 @@ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags, gfp_t gfp_flags); -void bpf_local_storage_free_rcu(struct rcu_head *rcu); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); #endif /* _BPF_LOCAL_STORAGE_H */ diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index d3ba3f2db6405..1904a4245ebe0 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -95,7 +95,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } -void bpf_local_storage_free_rcu(struct rcu_head *rcu) +static void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; @@ -251,7 +251,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, hlist_add_head_rcu(&selem->snode, &local_storage->list); } -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; -- GitLab From 2ffcb6fc50174d1efc8f98633eb2647d84483c68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:21 -0800 Subject: [PATCH 0322/2078] bpf: Refactor codes into bpf_local_storage_destroy This patch first renames bpf_local_storage_unlink_nolock to bpf_local_storage_destroy(). It better reflects that it is only used when the storage's owner (sk/task/cgrp/inode) is being kfree(). All bpf_local_storage_destroy's caller is taking the spin lock and then free the storage. This patch also moves these two steps into the bpf_local_storage_destroy. This is a preparation work for a later patch that uses bpf_mem_cache_alloc/free in the bpf_local_storage. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-3-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/bpf_cgrp_storage.c | 9 +-------- kernel/bpf/bpf_inode_storage.c | 8 +------- kernel/bpf/bpf_local_storage.c | 8 ++++++-- kernel/bpf/bpf_task_storage.c | 9 +-------- net/core/bpf_sk_storage.c | 8 +------- 6 files changed, 11 insertions(+), 33 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 502ad7093f13f..5908a954ddc21 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -128,7 +128,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache, diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 9ae07aedaf233..492594d69a86e 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -46,8 +46,6 @@ static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; - bool free_cgroup_storage = false; - unsigned long flags; rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); @@ -57,14 +55,9 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) } bpf_cgrp_storage_lock(); - raw_spin_lock_irqsave(&local_storage->lock, flags); - free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_local_storage_destroy(local_storage); bpf_cgrp_storage_unlock(); rcu_read_unlock(); - - if (free_cgroup_storage) - kfree_rcu(local_storage, rcu); } static struct bpf_local_storage_data * diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 43e2619c8167d..2d25bcfa371b3 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -57,7 +57,6 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, void bpf_inode_storage_free(struct inode *inode) { struct bpf_local_storage *local_storage; - bool free_inode_storage = false; struct bpf_storage_blob *bsb; bsb = bpf_inode(inode); @@ -72,13 +71,8 @@ void bpf_inode_storage_free(struct inode *inode) return; } - raw_spin_lock_bh(&local_storage->lock); - free_inode_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_bh(&local_storage->lock); + bpf_local_storage_destroy(local_storage); rcu_read_unlock(); - - if (free_inode_storage) - kfree_rcu(local_storage, rcu); } static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 1904a4245ebe0..e19f9f50a60df 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -652,11 +652,12 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { struct bpf_local_storage_elem *selem; bool free_storage = false; struct hlist_node *n; + unsigned long flags; /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. @@ -667,6 +668,7 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ + raw_spin_lock_irqsave(&local_storage->lock, flags); hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { /* Always unlink from map before unlinking from * local_storage. @@ -681,8 +683,10 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) free_storage = bpf_selem_unlink_storage_nolock( local_storage, selem, false, false); } + raw_spin_unlock_irqrestore(&local_storage->lock, flags); - return free_storage; + if (free_storage) + kfree_rcu(local_storage, rcu); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 20f942229f3c9..4dcef28744d12 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -72,8 +72,6 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; - bool free_task_storage = false; - unsigned long flags; rcu_read_lock(); @@ -84,14 +82,9 @@ void bpf_task_storage_free(struct task_struct *task) } bpf_task_storage_lock(); - raw_spin_lock_irqsave(&local_storage->lock, flags); - free_task_storage = bpf_local_storage_unlink_nolock(local_storage); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); rcu_read_unlock(); - - if (free_task_storage) - kfree_rcu(local_storage, rcu); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7a36353dbc22c..8f56438c104b2 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -49,7 +49,6 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; - bool free_sk_storage = false; rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -58,13 +57,8 @@ void bpf_sk_storage_free(struct sock *sk) return; } - raw_spin_lock_bh(&sk_storage->lock); - free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); - raw_spin_unlock_bh(&sk_storage->lock); + bpf_local_storage_destroy(sk_storage); rcu_read_unlock(); - - if (free_sk_storage) - kfree_rcu(sk_storage, rcu); } static void bpf_sk_storage_map_free(struct bpf_map *map) -- GitLab From 62827d612ae525695799b3635a087cb49c55e977 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:22 -0800 Subject: [PATCH 0323/2078] bpf: Remove __bpf_local_storage_map_alloc bpf_local_storage_map_alloc() is the only caller of __bpf_local_storage_map_alloc(). The remaining logic in bpf_local_storage_map_alloc() is only a one liner setting the smap->cache_idx. Remove __bpf_local_storage_map_alloc() to simplify code. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 63 ++++++++++++++-------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index e19f9f50a60df..f7234a8d49592 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -601,40 +601,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr) -{ - struct bpf_local_storage_map *smap; - unsigned int i; - u32 nbuckets; - - smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); - if (!smap) - return ERR_PTR(-ENOMEM); - bpf_map_init_from_attr(&smap->map, attr); - - nbuckets = roundup_pow_of_two(num_possible_cpus()); - /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ - nbuckets = max_t(u32, 2, nbuckets); - smap->bucket_log = ilog2(nbuckets); - - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); - if (!smap->buckets) { - bpf_map_area_free(smap); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < nbuckets; i++) { - INIT_HLIST_HEAD(&smap->buckets[i].list); - raw_spin_lock_init(&smap->buckets[i].lock); - } - - smap->elem_size = offsetof(struct bpf_local_storage_elem, - sdata.data[attr->value_size]); - - return smap; -} - int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, @@ -704,10 +670,33 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache) { struct bpf_local_storage_map *smap; + unsigned int i; + u32 nbuckets; + + smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); + if (!smap) + return ERR_PTR(-ENOMEM); + bpf_map_init_from_attr(&smap->map, attr); + + nbuckets = roundup_pow_of_two(num_possible_cpus()); + /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ + nbuckets = max_t(u32, 2, nbuckets); + smap->bucket_log = ilog2(nbuckets); - smap = __bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); + smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), + nbuckets, GFP_USER | __GFP_NOWARN); + if (!smap->buckets) { + bpf_map_area_free(smap); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < nbuckets; i++) { + INIT_HLIST_HEAD(&smap->buckets[i].list); + raw_spin_lock_init(&smap->buckets[i].lock); + } + + smap->elem_size = offsetof(struct bpf_local_storage_elem, + sdata.data[attr->value_size]); smap->cache_idx = bpf_local_storage_cache_idx_get(cache); return &smap->map; -- GitLab From 121f31f3e00dfc1acbca43f6f35779e050b56cfc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:23 -0800 Subject: [PATCH 0324/2078] bpf: Remove the preceding __ from __bpf_selem_unlink_storage __bpf_selem_unlink_storage is taking the spin lock and there is no name collision also. Having the preceding '__' is confusing when reviewing the later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index f7234a8d49592..70df8dcb20666 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -216,8 +216,8 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor return free_local_storage; } -static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) +static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, + bool use_trace_rcu) { struct bpf_local_storage *local_storage; bool free_local_storage = false; @@ -288,7 +288,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) * the local_storage. */ bpf_selem_unlink_map(selem); - __bpf_selem_unlink_storage(selem, use_trace_rcu); + bpf_selem_unlink_storage(selem, use_trace_rcu); } /* If cacheit_lockit is false, this lookup function is lockless */ -- GitLab From fc6652aab6ad545de70b772550da9043d0b47f1c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:24 -0800 Subject: [PATCH 0325/2078] bpf: Remember smap in bpf_local_storage This patch remembers which smap triggers the allocation of a 'struct bpf_local_storage' object. The local_storage is allocated during the very first selem added to the owner. The smap pointer is needed when using the bpf_mem_cache_free in a later patch because it needs to free to the correct smap's bpf_mem_alloc object. When a selem is being removed, it needs to check if it is the selem that triggers the creation of the local_storage. If it is, the local_storage->smap pointer will be reset to NULL. This NULL reset is done under the local_storage->lock in bpf_selem_unlink_storage_nolock() when a selem is being removed. Also note that the local_storage may not go away even local_storage->smap is NULL because there may be other selem still stored in the local_storage. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-6-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 + kernel/bpf/bpf_local_storage.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 5908a954ddc21..613b1805ed9f5 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -83,6 +83,7 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; + struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 70df8dcb20666..5585dbfd9c66f 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -213,6 +213,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor kfree_rcu(selem, rcu); } + if (rcu_access_pointer(local_storage->smap) == smap) + RCU_INIT_POINTER(local_storage->smap, NULL); + return free_local_storage; } @@ -368,6 +371,7 @@ int bpf_local_storage_alloc(void *owner, goto uncharge; } + RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); raw_spin_lock_init(&storage->lock); storage->owner = owner; -- GitLab From a47eabf216f77cb6f22ceb38d46f1bb95968579c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:25 -0800 Subject: [PATCH 0326/2078] bpf: Repurpose use_trace_rcu to reuse_now in bpf_local_storage This patch re-purpose the use_trace_rcu to mean if the freed memory can be reused immediately or not. The use_trace_rcu is renamed to reuse_now. Other than the boolean test is reversed, it should be a no-op. The following explains the reason for the rename and how it will be used in a later patch. In a later patch, bpf_mem_cache_alloc/free will be used in the bpf_local_storage. The bpf mem allocator will reuse the freed memory immediately. Some of the free paths in bpf_local_storage does not support memory to be reused immediately. These paths are the "delete" elem cases from the bpf_*_storage_delete() helper and the map_delete_elem() syscall. Note that "delete" elem before the owner's (sk/task/cgrp/inode) lifetime ended is not the common usage for the local storage. The common free path, bpf_local_storage_destroy(), can reuse the memory immediately. This common path means the storage stays with its owner until the owner is destroyed. The above mentioned "delete" elem paths that cannot reuse immediately always has the 'use_trace_rcu == true'. The cases that is safe for immediate reuse always have 'use_trace_rcu == false'. Instead of adding another arg in a later patch, this patch re-purpose this arg to reuse_now and have the test logic reversed. In a later patch, 'reuse_now == true' will free to the bpf_mem_cache_free() where the memory can be reused immediately. 'reuse_now == false' will go through the call_rcu_tasks_trace(). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-7-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/bpf_cgrp_storage.c | 2 +- kernel/bpf/bpf_inode_storage.c | 2 +- kernel/bpf/bpf_local_storage.c | 24 ++++++++++++------------ kernel/bpf/bpf_task_storage.c | 2 +- net/core/bpf_sk_storage.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 613b1805ed9f5..18a31add22555 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -143,7 +143,7 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 492594d69a86e..c975cacdd16b4 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -121,7 +121,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 2d25bcfa371b3..ad2ab0187e45c 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -122,7 +122,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 5585dbfd9c66f..70c34a948c3ce 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -147,7 +147,7 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool use_trace_rcu) + bool uncharge_mem, bool reuse_now) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -201,7 +201,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor * any special fields. */ rec = smap->map.record; - if (use_trace_rcu) { + if (!reuse_now) { if (!IS_ERR_OR_NULL(rec)) call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); else @@ -220,7 +220,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor } static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) + bool reuse_now) { struct bpf_local_storage *local_storage; bool free_local_storage = false; @@ -235,11 +235,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, use_trace_rcu); + local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) { - if (use_trace_rcu) + if (!reuse_now) call_rcu_tasks_trace(&local_storage->rcu, bpf_local_storage_free_rcu); else @@ -284,14 +284,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, raw_spin_unlock_irqrestore(&b->lock, flags); } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) { /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from * the local_storage. */ bpf_selem_unlink_map(selem); - bpf_selem_unlink_storage(selem, use_trace_rcu); + bpf_selem_unlink_storage(selem, reuse_now); } /* If cacheit_lockit is false, this lookup function is lockless */ @@ -538,7 +538,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - false, true); + false, false); } unlock: @@ -651,7 +651,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * of the loop will set the free_cgroup_storage to true. */ free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false, false); + local_storage, selem, false, true); } raw_spin_unlock_irqrestore(&local_storage->lock, flags); @@ -745,7 +745,7 @@ void bpf_local_storage_map_free(struct bpf_map *map, migrate_disable(); this_cpu_inc(*busy_counter); } - bpf_selem_unlink(selem, false); + bpf_selem_unlink(selem, true); if (busy_counter) { this_cpu_dec(*busy_counter); migrate_enable(); @@ -783,8 +783,8 @@ void bpf_local_storage_map_free(struct bpf_map *map, /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() * is true, because while call_rcu invocation is skipped in that * case in bpf_selem_free_fields_trace_rcu (and all local - * storage maps pass use_trace_rcu = true), there can be - * call_rcu callbacks based on use_trace_rcu = false in the + * storage maps pass reuse_now = false), there can be + * call_rcu callbacks based on reuse_now = true in the * while ((selem = ...)) loop above or when owner's free path * calls bpf_local_storage_unlink_nolock. */ diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 4dcef28744d12..c88cc04c17c1f 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -168,7 +168,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, if (!nobusy) return -EBUSY; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 8f56438c104b2..a5f185b8e50ad 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } -- GitLab From c609981342dca634e5dea8c6ca175b6533581261 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:26 -0800 Subject: [PATCH 0327/2078] bpf: Remove bpf_selem_free_fields*_rcu This patch removes the bpf_selem_free_fields*_rcu. The bpf_obj_free_fields() can be done before the call_rcu_trasks_trace() and kfree_rcu(). It is needed when a later patch uses bpf_mem_cache_alloc/free. In bpf hashtab, bpf_obj_free_fields() is also called before calling bpf_mem_cache_free. The discussion can be found in https://lore.kernel.org/bpf/f67021ee-21d9-bfae-6134-4ca542fab843@linux.dev/ Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-8-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 67 +++------------------------------- 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 70c34a948c3ce..715deaaefe13b 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,27 +109,6 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_fields_rcu(struct rcu_head *rcu) -{ - struct bpf_local_storage_elem *selem; - struct bpf_local_storage_map *smap; - - selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - /* protected by the rcu_barrier*() */ - smap = rcu_dereference_protected(SDATA(selem)->smap, true); - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - kfree(selem); -} - -static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu) -{ - /* Free directly if Tasks Trace RCU GP also implies RCU GP */ - if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_fields_rcu(rcu); - else - call_rcu(rcu, bpf_selem_free_fields_rcu); -} - static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -151,7 +130,6 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; - struct btf_record *rec; void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); @@ -192,26 +170,11 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - /* A different RCU callback is chosen whenever we need to free - * additional fields in selem data before freeing selem. - * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU - * callbacks when it has special fields, hence we can only conditionally - * dereference smap, as by this time the map might have already been - * freed without waiting for our call_rcu callback if it did not have - * any special fields. - */ - rec = smap->map.record; - if (!reuse_now) { - if (!IS_ERR_OR_NULL(rec)) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); - else - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - } else { - if (!IS_ERR_OR_NULL(rec)) - call_rcu(&selem->rcu, bpf_selem_free_fields_rcu); - else - kfree_rcu(selem, rcu); - } + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + if (!reuse_now) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + else + kfree_rcu(selem, rcu); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -769,26 +732,6 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); - /* Only delay freeing of smap, buckets are not needed anymore */ kvfree(smap->buckets); - - /* When local storage has special fields, callbacks for - * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will - * keep using the map BTF record, we need to execute an RCU barrier to - * wait for them as the record will be freed right after our map_free - * callback. - */ - if (!IS_ERR_OR_NULL(smap->map.record)) { - rcu_barrier_tasks_trace(); - /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() - * is true, because while call_rcu invocation is skipped in that - * case in bpf_selem_free_fields_trace_rcu (and all local - * storage maps pass reuse_now = false), there can be - * call_rcu callbacks based on reuse_now = true in the - * while ((selem = ...)) loop above or when owner's free path - * calls bpf_local_storage_unlink_nolock. - */ - rcu_barrier(); - } bpf_map_area_free(smap); } -- GitLab From f8ccf30c179ec1ac16654f6e6ceb40cce1530b91 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:27 -0800 Subject: [PATCH 0328/2078] bpf: Add bpf_selem_free_rcu callback Add bpf_selem_free_rcu() callback to do the kfree() instead of using kfree_rcu. It is a preparation work for using bpf_mem_cache_alloc/free in a later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-9-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 715deaaefe13b..146e9caeda963 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -109,15 +109,20 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + kfree(selem); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ if (rcu_trace_implies_rcu_gp()) - kfree(selem); + bpf_selem_free_rcu(rcu); else - kfree_rcu(selem, rcu); + call_rcu(rcu, bpf_selem_free_rcu); } /* local_storage->lock must be held and selem->local_storage == local_storage. @@ -174,7 +179,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor if (!reuse_now) call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); else - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, bpf_selem_free_rcu); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); -- GitLab From c0d63f309186d8492577c67c67984c714b6b72bc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:28 -0800 Subject: [PATCH 0329/2078] bpf: Add bpf_selem_free() This patch refactors the selem freeing logic into bpf_selem_free(). It is a preparation work for a later patch using bpf_mem_cache_alloc/free. The other kfree(selem) cases are also changed to bpf_selem_free(..., reuse_now = true). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-10-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 4 ++++ kernel/bpf/bpf_local_storage.c | 21 ++++++++++++++------- net/core/bpf_sk_storage.c | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 18a31add22555..a34f61467a2f1 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -152,6 +152,10 @@ struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool charge_mem, gfp_t gfp_flags); +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now); + int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 146e9caeda963..512943aac4354 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -125,6 +125,17 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) call_rcu(rcu, bpf_selem_free_rcu); } +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now) +{ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + if (!reuse_now) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + else + call_rcu(&selem->rcu, bpf_selem_free_rcu); +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -175,11 +186,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - if (!reuse_now) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); - else - call_rcu(&selem->rcu, bpf_selem_free_rcu); + bpf_selem_free(selem, smap, reuse_now); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -423,7 +430,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); if (err) { - kfree(selem); + bpf_selem_free(selem, smap, true); mem_uncharge(smap, owner, smap->elem_size); return ERR_PTR(err); } @@ -517,7 +524,7 @@ unlock_err: raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (selem) { mem_uncharge(smap, owner, smap->elem_size); - kfree(selem); + bpf_selem_free(selem, smap, true); } return ERR_PTR(err); } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index a5f185b8e50ad..24c3dc0d62e5a 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -197,7 +197,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); if (ret) { - kfree(copy_selem); + bpf_selem_free(copy_selem, smap, true); atomic_sub(smap->elem_size, &newsk->sk_omem_alloc); bpf_map_put(map); -- GitLab From 1288aaa2786b1e58c9e88e53f7654d520ebe0f3b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:29 -0800 Subject: [PATCH 0330/2078] bpf: Add bpf_local_storage_rcu callback The existing bpf_local_storage_free_rcu is renamed to bpf_local_storage_free_trace_rcu. A new bpf_local_storage_rcu callback is added to do the kfree instead of using kfree_rcu. It is a preparation work for a later patch using bpf_mem_cache_alloc/free. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-11-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 512943aac4354..0fbc477c8b271 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -99,14 +99,19 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + kfree(local_storage); +} + +static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) +{ /* If RCU Tasks Trace grace period implies RCU grace period, do * kfree(), else do kfree_rcu(). */ - local_storage = container_of(rcu, struct bpf_local_storage, rcu); if (rcu_trace_implies_rcu_gp()) - kfree(local_storage); + bpf_local_storage_free_rcu(rcu); else - kfree_rcu(local_storage, rcu); + call_rcu(rcu, bpf_local_storage_free_rcu); } static void bpf_selem_free_rcu(struct rcu_head *rcu) @@ -216,9 +221,9 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, if (free_local_storage) { if (!reuse_now) call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_rcu); + bpf_local_storage_free_trace_rcu); else - kfree_rcu(local_storage, rcu); + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); } } @@ -631,7 +636,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_storage) - kfree_rcu(local_storage, rcu); + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) -- GitLab From 7e30a8477b0bdd13dfd0b24e4f32b26d22b96e6c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:30 -0800 Subject: [PATCH 0331/2078] bpf: Add bpf_local_storage_free() This patch refactors local_storage freeing logic into bpf_local_storage_free(). It is a preparation work for a later patch that uses bpf_mem_cache_alloc/free. The other kfree(local_storage) cases are also changed to bpf_local_storage_free(..., reuse_now = true). Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-12-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 0fbc477c8b271..351d991694cbe 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -114,6 +114,16 @@ static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) call_rcu(rcu, bpf_local_storage_free_rcu); } +static void bpf_local_storage_free(struct bpf_local_storage *local_storage, + bool reuse_now) +{ + if (!reuse_now) + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_trace_rcu); + else + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); +} + static void bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -218,13 +228,8 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); - if (free_local_storage) { - if (!reuse_now) - call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_trace_rcu); - else - call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); - } + if (free_local_storage) + bpf_local_storage_free(local_storage, reuse_now); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -391,7 +396,7 @@ int bpf_local_storage_alloc(void *owner, return 0; uncharge: - kfree(storage); + bpf_local_storage_free(storage, true); mem_uncharge(smap, owner, sizeof(*storage)); return err; } @@ -636,7 +641,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_storage) - call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); + bpf_local_storage_free(local_storage, true); } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) -- GitLab From 57ef77152b58770cbd54d624babd8f5d90805ea7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:34 -0800 Subject: [PATCH 0332/2078] selftests/bpf: Replace CHECK with ASSERT in test_local_storage This patch migrates the CHECK macro to ASSERT macro. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-16-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/test_local_storage.c | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 9c77cd6b1eafb..563a9c746b7bc 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -13,8 +13,6 @@ #include "network_helpers.h" #include "task_local_storage_helpers.h" -static unsigned int duration; - #define TEST_STORAGE_VALUE 0xbeefdead struct storage { @@ -60,36 +58,30 @@ static bool check_syscall_operations(int map_fd, int obj_fd) /* Looking up an existing element should fail initially */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem")) return false; /* Create a new element */ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); - if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) return false; /* Lookup the newly created element */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) return false; /* Check the value of the newly created element */ - if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem", - "value got = %x errno:%d", lookup_val.value, val.value)) + if (!ASSERT_EQ(lookup_val.value, val.value, "bpf_map_lookup_elem")) return false; err = bpf_map_delete_elem(map_fd, &obj_fd); - if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, - errno)) + if (!ASSERT_OK(err, "bpf_map_delete_elem()")) return false; /* The lookup should fail, now that the element has been deleted */ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem")) return false; return true; @@ -104,35 +96,32 @@ void test_test_local_storage(void) char cmd[256]; skel = local_storage__open_and_load(); - if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_load")) goto close_prog; err = local_storage__attach(skel); - if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + if (!ASSERT_OK(err, "attach")) goto close_prog; task_fd = sys_pidfd_open(getpid(), 0); - if (CHECK(task_fd < 0, "pidfd_open", - "failed to get pidfd err:%d, errno:%d", task_fd, errno)) + if (!ASSERT_GE(task_fd, 0, "pidfd_open")) goto close_prog; if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map), task_fd)) goto close_prog; - if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp", - "unable to create tmpdir: %d\n", errno)) + if (!ASSERT_OK_PTR(mkdtemp(tmp_dir_path), "mkdtemp")) goto close_prog; snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", tmp_dir_path); snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path); - if (CHECK_FAIL(system(cmd))) + if (!ASSERT_OK(system(cmd), "system(cp)")) goto close_prog_rmdir; rm_fd = open(tmp_exec_path, O_RDONLY); - if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", - tmp_exec_path, rm_fd, errno)) + if (!ASSERT_GE(rm_fd, 0, "open(tmp_exec_path)")) goto close_prog_rmdir; if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), @@ -145,7 +134,7 @@ void test_test_local_storage(void) * LSM program. */ err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); - if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) + if (!ASSERT_EQ(err, EPERM, "run_self_unlink")) goto close_prog_rmdir; /* Set the process being monitored to be the current process */ @@ -156,18 +145,16 @@ void test_test_local_storage(void) */ snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr", tmp_dir_path, tmp_dir_path); - if (CHECK_FAIL(system(cmd))) + if (!ASSERT_OK(system(cmd), "system(mv)")) goto close_prog_rmdir; - CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", - "inode_local_storage not set\n"); + ASSERT_EQ(skel->data->inode_storage_result, 0, "inode_storage_result"); serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); - if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + if (!ASSERT_GE(serv_sk, 0, "start_server")) goto close_prog_rmdir; - CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", - "sk_local_storage not set\n"); + ASSERT_EQ(skel->data->sk_storage_result, 0, "sk_storage_result"); if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), serv_sk)) -- GitLab From 1f443d0f2b5702bad5f03aab544858ddd33999b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:35 -0800 Subject: [PATCH 0333/2078] selftests/bpf: Check freeing sk->sk_local_storage with sk_local_storage->smap is NULL This patch tweats the socket_bind bpf prog to test the local_storage->smap == NULL case in the bpf_local_storage_free() code path. The idea is to create the local_storage with the sk_storage_map's selem first. Then add the sk_storage_map2's selem and then delete the earlier sk_storeage_map's selem. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-17-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/local_storage.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 01c74bc870aea..c8ba7207f5a58 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -108,18 +108,17 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, { __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; - int err; if (pid != monitored_pid) return 0; - storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 0); if (!storage) return 0; + sk_storage_result = -1; if (storage->value != DUMMY_STORAGE_VALUE) - sk_storage_result = -1; + return 0; /* This tests that we can associate multiple elements * with the local storage. @@ -129,14 +128,26 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (!storage) return 0; - err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); - if (err) + if (bpf_sk_storage_delete(&sk_storage_map2, sock->sk)) return 0; - err = bpf_sk_storage_delete(&sk_storage_map2, sock->sk); - if (!err) - sk_storage_result = err; + storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (bpf_sk_storage_delete(&sk_storage_map, sock->sk)) + return 0; + + /* Ensure that the sk_storage_map is disconnected from the storage. + * The storage memory should not be freed back to the + * bpf_mem_alloc of the sk_bpf_storage_map because + * sk_bpf_storage_map may have been gone. + */ + if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap) + return 0; + sk_storage_result = 0; return 0; } -- GitLab From 4659fba121dac21a3516986a3c2cf8459c7ac3bc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 7 Mar 2023 22:59:36 -0800 Subject: [PATCH 0334/2078] selftests/bpf: Add local-storage-create benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch tests how many kmallocs is needed to create and free a batch of UDP sockets and each socket has a 64bytes bpf storage. It also measures how fast the UDP sockets can be created. The result is from my qemu setup. Before bpf_mem_cache_alloc/free: ./bench -p 1 local-storage-create Setting up benchmark 'local-storage-create'... Benchmark 'local-storage-create' started. Iter 0 ( 73.193us): creates 213.552k/s (213.552k/prod), 3.09 kmallocs/create Iter 1 (-20.724us): creates 211.908k/s (211.908k/prod), 3.09 kmallocs/create Iter 2 ( 9.280us): creates 212.574k/s (212.574k/prod), 3.12 kmallocs/create Iter 3 ( 11.039us): creates 213.209k/s (213.209k/prod), 3.12 kmallocs/create Iter 4 (-11.411us): creates 213.351k/s (213.351k/prod), 3.12 kmallocs/create Iter 5 ( -7.915us): creates 214.754k/s (214.754k/prod), 3.12 kmallocs/create Iter 6 ( 11.317us): creates 210.942k/s (210.942k/prod), 3.12 kmallocs/create Summary: creates 212.789 ± 1.310k/s (212.789k/prod), 3.12 kmallocs/create After bpf_mem_cache_alloc/free: ./bench -p 1 local-storage-create Setting up benchmark 'local-storage-create'... Benchmark 'local-storage-create' started. Iter 0 ( 68.265us): creates 243.984k/s (243.984k/prod), 1.04 kmallocs/create Iter 1 ( 30.357us): creates 238.424k/s (238.424k/prod), 1.04 kmallocs/create Iter 2 (-18.712us): creates 232.963k/s (232.963k/prod), 1.04 kmallocs/create Iter 3 (-15.885us): creates 238.879k/s (238.879k/prod), 1.04 kmallocs/create Iter 4 ( 5.590us): creates 237.490k/s (237.490k/prod), 1.04 kmallocs/create Iter 5 ( 8.577us): creates 237.521k/s (237.521k/prod), 1.04 kmallocs/create Iter 6 ( -6.263us): creates 238.508k/s (238.508k/prod), 1.04 kmallocs/create Summary: creates 237.298 ± 2.198k/s (237.298k/prod), 1.04 kmallocs/create Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20230308065936.1550103-18-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/bench.c | 2 + .../bpf/benchs/bench_local_storage_create.c | 141 ++++++++++++++++++ .../bpf/progs/bench_local_storage_create.c | 57 +++++++ 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/bpf/benchs/bench_local_storage_create.c create mode 100644 tools/testing/selftests/bpf/progs/bench_local_storage_create.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 606e2d738dd84..55811c448eb78 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -639,6 +639,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h +$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm @@ -656,6 +657,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_local_storage.o \ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ $(OUTPUT)/bench_bpf_hashmap_lookup.o \ + $(OUTPUT)/bench_local_storage_create.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 0b2a53bb84609..dc3827c1f139c 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -515,6 +515,7 @@ extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; extern const struct bench bench_bpf_hashmap_lookup; +extern const struct bench bench_local_storage_create; static const struct bench *benchs[] = { &bench_count_global, @@ -555,6 +556,7 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, &bench_bpf_hashmap_lookup, + &bench_local_storage_create, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c new file mode 100644 index 0000000000000..f8b2a640ccbeb --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "bench.h" +#include "bench_local_storage_create.skel.h" + +#define BATCH_SZ 32 + +struct thread { + int fds[BATCH_SZ]; +}; + +static struct bench_local_storage_create *skel; +static struct thread *threads; +static long socket_errs; + +static void validate(void) +{ + if (env.consumer_cnt > 1) { + fprintf(stderr, + "local-storage-create benchmark does not need consumer\n"); + exit(1); + } +} + +static void setup(void) +{ + skel = bench_local_storage_create__open_and_load(); + if (!skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + skel->bss->bench_pid = getpid(); + + if (!bpf_program__attach(skel->progs.socket_post_create)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + if (!bpf_program__attach(skel->progs.kmalloc)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + threads = calloc(env.producer_cnt, sizeof(*threads)); + + if (!threads) { + fprintf(stderr, "cannot alloc thread_res\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&skel->bss->create_cnts, 0); + res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + int *fds = t->fds; + int i; + + while (true) { + for (i = 0; i < BATCH_SZ; i++) { + fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); + if (fds[i] == -1) + atomic_inc(&socket_errs); + } + + for (i = 0; i < BATCH_SZ; i++) { + if (fds[i] != -1) + close(fds[i]); + } + } + + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double creates_per_sec, kmallocs_per_create; + + creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0); + kmallocs_per_create = (double)res->drops / res->hits; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("creates %8.3lfk/s (%7.3lfk/prod), ", + creates_per_sec, creates_per_sec / env.producer_cnt); + printf("%3.2lf kmallocs/create\n", kmallocs_per_create); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double creates_mean = 0.0, creates_stddev = 0.0; + long total_creates = 0, total_kmallocs = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + total_creates += res[i].hits; + total_kmallocs += res[i].drops; + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + creates_stddev += (creates_mean - res[i].hits / 1000.0) * + (creates_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + creates_stddev = sqrt(creates_stddev); + } + printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ", + creates_mean, creates_stddev, creates_mean / env.producer_cnt); + printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates); + if (socket_errs || skel->bss->create_errs) + printf("socket() errors %ld create_errs %ld\n", socket_errs, + skel->bss->create_errs); +} + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_local_storage_create = { + .name = "local-storage-create", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c new file mode 100644 index 0000000000000..2814bab54d28f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +long create_errs = 0; +long create_cnts = 0; +long kmalloc_cnts = 0; +__u32 bench_pid = 0; + +struct storage { + __u8 data[64]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct storage); +} sk_storage_map SEC(".maps"); + +SEC("raw_tp/kmalloc") +int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, + int node) +{ + __sync_fetch_and_add(&kmalloc_cnts, 1); + + return 0; +} + +SEC("lsm.s/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct storage *stg; + __u32 pid; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != bench_pid) + return 0; + + stg = bpf_sk_storage_get(&sk_storage_map, sock->sk, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + if (stg) + __sync_fetch_and_add(&create_cnts, 1); + else + __sync_fetch_and_add(&create_errs, 1); + + return 0; +} + +char __license[] SEC("license") = "GPL"; -- GitLab From b32a5dae44cc7346835839c2e92356ff4609c823 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:06 -0800 Subject: [PATCH 0335/2078] bpf: verifier: Rename kernel_type_name helper to btf_type_name kernel_type_name was introduced in commit 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF") with type signature: const char *kernel_type_name(u32 id) At that time the function used global btf_vmlinux BTF for all id lookups. Later, in commit 22dc4a0f5ed1 ("bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier"), the type signature was changed to: static const char *kernel_type_name(const struct btf* btf, u32 id) With the btf parameter used for lookups instead of global btf_vmlinux. The helper will function as expected for type name lookup using non-kernel BTFs, and will be used for such in further patches in the series. Let's rename it to avoid incorrect assumptions that might arise when seeing the current name. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0aaf7703326b0..073c56dbec63e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -752,7 +752,7 @@ static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); } -static const char *kernel_type_name(const struct btf* btf, u32 id) +static const char *btf_type_name(const struct btf *btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } @@ -782,7 +782,7 @@ static const char *iter_type_str(const struct btf *btf, u32 btf_id) return ""; /* we already validated that type is valid and has conforming name */ - return kernel_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; + return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; } static const char *iter_state_str(enum bpf_iter_state state) @@ -1349,7 +1349,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "%s", reg_type_str(env, t)); if (base_type(t) == PTR_TO_BTF_ID) - verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); + verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); verbose(env, "("); /* * _a stands for append, was shortened to avoid multiline statements below. @@ -4518,7 +4518,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, struct btf_field *kptr_field, struct bpf_reg_state *reg, u32 regno) { - const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); + const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; const char *reg_name = ""; @@ -4534,7 +4534,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, return -EINVAL; } /* We need to verify reg->type and reg->btf, before accessing reg->btf */ - reg_name = kernel_type_name(reg->btf, reg->btf_id); + reg_name = btf_type_name(reg->btf, reg->btf_id); /* For ref_ptr case, release function check should ensure we get one * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the @@ -7177,8 +7177,8 @@ found: btf_vmlinux, *arg_btf_id, strict_type_match)) { verbose(env, "R%d is of type %s but %s is expected\n", - regno, kernel_type_name(reg->btf, reg->btf_id), - kernel_type_name(btf_vmlinux, *arg_btf_id)); + regno, btf_type_name(reg->btf, reg->btf_id), + btf_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } } @@ -7248,7 +7248,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, verbose(env, "R%d must have zero offset when passed to release func\n", regno); verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno, - kernel_type_name(reg->btf, reg->btf_id), reg->off); + btf_type_name(reg->btf, reg->btf_id), reg->off); return -EINVAL; } -- GitLab From a4aa38897b6a6dad4318bed036edc7ed0c8a4578 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:07 -0800 Subject: [PATCH 0336/2078] bpf: btf: Remove unused btf_field_info_type enum This enum was added and used in commit aa3496accc41 ("bpf: Refactor kptr_off_tab into btf_record"). Later refactoring in commit db559117828d ("bpf: Consolidate spin_lock, timer management into btf_record") resulted in the enum values no longer being used anywhere. Let's remove them. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 71758cd15b070..37779ceefd09f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3231,12 +3231,6 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -enum btf_field_info_type { - BTF_FIELD_SPIN_LOCK, - BTF_FIELD_TIMER, - BTF_FIELD_KPTR, -}; - enum { BTF_FIELD_IGNORE = 0, BTF_FIELD_FOUND = 1, -- GitLab From 74843b57ec70af7b67b7e6153374834ee18d139f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 9 Mar 2023 10:01:08 -0800 Subject: [PATCH 0337/2078] bpf: Change btf_record_find enum parameter to field_mask btf_record_find's 3rd parameter can be multiple enum btf_field_type's masked together. The function is called with BPF_KPTR in two places in verifier.c, so it works with masked values already. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230309180111.1618459-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e64ff1e89fb2a..3a38db315f7f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1925,7 +1925,7 @@ void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, - u32 offset, enum btf_field_type type); + u32 offset, u32 field_mask); void btf_record_free(struct btf_record *rec); void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f406dfa137924..cc4b7684910cb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -520,14 +520,14 @@ static int btf_field_cmp(const void *a, const void *b) } struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, - enum btf_field_type type) + u32 field_mask) { struct btf_field *field; - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) return NULL; field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); - if (!field || !(field->type & type)) + if (!field || !(field->type & field_mask)) return NULL; return field; } -- GitLab From c1f9e14e3b676eb88fe1c9488c0b5f4fc9108a1c Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Wed, 8 Mar 2023 20:53:03 +0000 Subject: [PATCH 0338/2078] bpf, docs: Explain helper functions Add brief text about existence of helper functions, with details to go in separate psABI text. Note that text about runtime functions (kfuncs) is part of a separate patch, not this one. Signed-off-by: Dave Thaler Link: https://lore.kernel.org/r/20230308205303.1308-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/clang-notes.rst | 6 ++++++ Documentation/bpf/instruction-set.rst | 9 ++++++++- Documentation/bpf/linux-notes.rst | 8 ++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst index 528feddf2db90..2c872a1ee08e9 100644 --- a/Documentation/bpf/clang-notes.rst +++ b/Documentation/bpf/clang-notes.rst @@ -20,6 +20,12 @@ Arithmetic instructions For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with ``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included. +Jump instructions +================= + +If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d) +instruction, which is not supported by the Linux kernel verifier. + Atomic operations ================= diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index db8789e6969ea..5e43e14abe80e 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -253,7 +253,7 @@ BPF_JSET 0x40 PC += off if dst & src BPF_JNE 0x50 PC += off if dst != src BPF_JSGT 0x60 PC += off if dst > src signed BPF_JSGE 0x70 PC += off if dst >= src signed -BPF_CALL 0x80 function call +BPF_CALL 0x80 function call see `Helper functions`_ BPF_EXIT 0x90 function / program return BPF_JMP only BPF_JLT 0xa0 PC += off if dst < src unsigned BPF_JLE 0xb0 PC += off if dst <= src unsigned @@ -264,6 +264,13 @@ BPF_JSLE 0xd0 PC += off if dst <= src signed The eBPF program needs to store the return value into register R0 before doing a BPF_EXIT. +Helper functions +~~~~~~~~~~~~~~~~ + +Helper functions are a concept whereby BPF programs can call into a +set of function calls exposed by the runtime. Each helper +function is identified by an integer used in a ``BPF_CALL`` instruction. +The available helper functions may differ for each program type. Load and store instructions =========================== diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst index 956b0c86699d0..f43b9c797bcbe 100644 --- a/Documentation/bpf/linux-notes.rst +++ b/Documentation/bpf/linux-notes.rst @@ -12,6 +12,14 @@ Byte swap instructions ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively. +Jump instructions +================= + +``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function +integer would be read from a specified register, is not currently supported +by the verifier. Any programs with this instruction will fail to load +until such support is added. + Legacy BPF Packet access instructions ===================================== -- GitLab From cc4342f60f1a6d0f4a30ae1887a75834d0109444 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 7 Mar 2023 20:29:27 +0100 Subject: [PATCH 0339/2078] net: mvpp2: Defer probe if MAC address source is not yet ready NVMEM layouts are no longer registered early, and thus may not yet be available when Ethernet drivers (or any other consumer) probe, leading to possible probe deferrals errors. Forward the error code if this happens. All other errors being discarded, the driver will eventually use a random MAC address if no other source was considered valid (no functional change on this regard). Signed-off-by: Miquel Raynal Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/20230307192927.512757-1-miquel.raynal@bootlin.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 9b4ecbe4f36d4..e7c7652ffac57 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6081,18 +6081,19 @@ static bool mvpp2_port_has_irqs(struct mvpp2 *priv, return true; } -static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, - struct fwnode_handle *fwnode, - char **mac_from) +static int mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, + struct fwnode_handle *fwnode, + char **mac_from) { struct mvpp2_port *port = netdev_priv(dev); char hw_mac_addr[ETH_ALEN] = {0}; char fw_mac_addr[ETH_ALEN]; + int ret; if (!fwnode_get_mac_address(fwnode, fw_mac_addr)) { *mac_from = "firmware node"; eth_hw_addr_set(dev, fw_mac_addr); - return; + return 0; } if (priv->hw_version == MVPP21) { @@ -6100,19 +6101,24 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, if (is_valid_ether_addr(hw_mac_addr)) { *mac_from = "hardware"; eth_hw_addr_set(dev, hw_mac_addr); - return; + return 0; } } /* Only valid on OF enabled platforms */ - if (!of_get_mac_address_nvmem(to_of_node(fwnode), fw_mac_addr)) { + ret = of_get_mac_address_nvmem(to_of_node(fwnode), fw_mac_addr); + if (ret == -EPROBE_DEFER) + return ret; + if (!ret) { *mac_from = "nvmem cell"; eth_hw_addr_set(dev, fw_mac_addr); - return; + return 0; } *mac_from = "random"; eth_hw_addr_random(dev); + + return 0; } static struct mvpp2_port *mvpp2_phylink_to_port(struct phylink_config *config) @@ -6815,7 +6821,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, mutex_init(&port->gather_stats_lock); INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics); - mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from); + err = mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from); + if (err < 0) + goto err_free_stats; port->tx_ring_size = MVPP2_MAX_TXD_DFLT; port->rx_ring_size = MVPP2_MAX_RXD_DFLT; -- GitLab From c8e18754091479fac3f5b6c053c6bc4be0b7fb11 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:41 -0800 Subject: [PATCH 0340/2078] bpf: Support __kptr to local kptrs If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module BTF - it must have been allocated by bpf_obj_new and therefore must be free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new. This patch adds support for treating __kptr-tagged pointers to "local kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr acquire / release semantics. Consider the following example: struct node_data { long key; long data; struct bpf_rb_node node; }; struct map_value { struct node_data __kptr *node; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct map_value); __uint(max_entries, 1); } some_nodes SEC(".maps"); If struct node_data had a matching definition in kernel BTF, the verifier would expect a destructor for the type to be registered. Since struct node_data does not match any type in kernel BTF, the verifier knows that there is no kfunc that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can only come from bpf_obj_new. So instead of searching for a registered dtor, a bpf_obj_drop dtor can be assumed. This allows the runtime to properly destruct such kptrs in bpf_obj_free_fields, which enables maps to clean up map_vals w/ such kptrs when going away. Implementation notes: * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr. Before this patch, the variable would only ever point to vmlinux or module BTFs, but now it can point to some program BTF for local kptr type. It's later used to populate the (btf, btf_id) pair in kptr btf field. * It's necessary to btf_get the program BTF when populating btf_field for local kptr. btf_record_free later does a btf_put. * Behavior for non-local referenced kptrs is not modified, as bpf_find_btf_id helper only searches vmlinux and module BTFs for matching BTF type. If such a type is found, btf_field_kptr's btf will pass btf_is_kernel check, and the associated release function is some one-argument dtor. If btf_is_kernel check fails, associated release function is two-arg bpf_obj_drop_impl. Before this patch only btf_field_kptr's w/ kernel or module BTFs were created. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++++++++- include/linux/btf.h | 2 -- kernel/bpf/btf.c | 37 ++++++++++++++++++++++++++++--------- kernel/bpf/helpers.c | 11 ++++++++--- kernel/bpf/syscall.c | 14 +++++++++++++- 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3a38db315f7f6..756b85f0d0d38 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -189,10 +189,19 @@ enum btf_field_type { BPF_RB_NODE | BPF_RB_ROOT, }; +typedef void (*btf_dtor_kfunc_t)(void *); +typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *); + struct btf_field_kptr { struct btf *btf; struct module *module; - btf_dtor_kfunc_t dtor; + union { + /* dtor used if btf_is_kernel(btf), otherwise the type + * is program-allocated and obj_drop is used + */ + btf_dtor_kfunc_t dtor; + btf_dtor_obj_drop obj_drop; + }; u32 btf_id; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bba0827e8c46..d53b10cc55f2c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -121,8 +121,6 @@ struct btf_struct_metas { struct btf_struct_meta types[]; }; -typedef void (*btf_dtor_kfunc_t)(void *); - extern const struct file_operations btf_fops; void btf_get(struct btf *btf); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 37779ceefd09f..66fad7a16b6cf 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, return -EINVAL; } +extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { struct module *mod = NULL; const struct btf_type *t; - struct btf *kernel_btf; + /* If a matching btf type is found in kernel or module BTFs, kptr_ref + * is that BTF, otherwise it's program BTF + */ + struct btf *kptr_btf; int ret; s32 id; @@ -3565,7 +3570,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, */ t = btf_type_by_id(btf, info->kptr.type_id); id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), - &kernel_btf); + &kptr_btf); + if (id == -ENOENT) { + /* btf_parse_kptr should only be called w/ btf = program BTF */ + WARN_ON_ONCE(btf_is_kernel(btf)); + + /* Type exists only in program BTF. Assume that it's a MEM_ALLOC + * kptr allocated via bpf_obj_new + */ + field->kptr.dtor = (void *)&__bpf_obj_drop_impl; + id = info->kptr.type_id; + kptr_btf = (struct btf *)btf; + btf_get(kptr_btf); + goto found_dtor; + } if (id < 0) return id; @@ -3582,20 +3600,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, * can be used as a referenced pointer and be stored in a map at * the same time. */ - dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); + dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id); if (dtor_btf_id < 0) { ret = dtor_btf_id; goto end_btf; } - dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); + dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id); if (!dtor_func) { ret = -ENOENT; goto end_btf; } - if (btf_is_module(kernel_btf)) { - mod = btf_try_get_module(kernel_btf); + if (btf_is_module(kptr_btf)) { + mod = btf_try_get_module(kptr_btf); if (!mod) { ret = -ENXIO; goto end_btf; @@ -3605,7 +3623,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, /* We already verified dtor_func to be btf_type_is_func * in register_btf_id_dtor_kfuncs. */ - dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); + dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off); addr = kallsyms_lookup_name(dtor_func_name); if (!addr) { ret = -EINVAL; @@ -3614,14 +3632,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, field->kptr.dtor = (void *)addr; } +found_dtor: field->kptr.btf_id = id; - field->kptr.btf = kernel_btf; + field->kptr.btf = kptr_btf; field->kptr.module = mod; return 0; end_mod: module_put(mod); end_btf: - btf_put(kernel_btf); + btf_put(kptr_btf); return ret; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f9b7eeedce08b..77d64b6951b9c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) +{ + if (rec) + bpf_obj_free_fields(rec, p); + bpf_mem_free(&bpf_global_ma, p); +} + __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; void *p = p__alloc; - if (meta) - bpf_obj_free_fields(meta->record, p); - bpf_mem_free(&bpf_global_ma, p); + __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cc4b7684910cb..0684febc447a8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) return; fields = rec->fields; for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *pointee_struct_meta; const struct btf_field *field = &fields[i]; void *field_ptr = obj + field->offset; + void *xchgd_field; switch (fields[i].type) { case BPF_SPIN_LOCK: @@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) WRITE_ONCE(*(u64 *)field_ptr, 0); break; case BPF_KPTR_REF: - field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); + xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); + if (!btf_is_kernel(field->kptr.btf)) { + pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, + field->kptr.btf_id); + WARN_ON_ONCE(!pointee_struct_meta); + field->kptr.obj_drop(xchgd_field, pointee_struct_meta ? + pointee_struct_meta->record : + NULL); + } else { + field->kptr.dtor(xchgd_field); + } break; case BPF_LIST_HEAD: if (WARN_ON_ONCE(rec->spin_lock_off < 0)) -- GitLab From 738c96d5e2e3700b370f02ac84a9dc159ca81f25 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:42 -0800 Subject: [PATCH 0341/2078] bpf: Allow local kptrs to be exchanged via bpf_kptr_xchg The previous patch added necessary plumbing for verifier and runtime to know what to do with non-kernel PTR_TO_BTF_IDs in map values, but didn't provide any way to get such local kptrs into a map value. This patch modifies verifier handling of bpf_kptr_xchg to allow MEM_ALLOC kptr types. check_reg_type is modified accept MEM_ALLOC-flagged input to bpf_kptr_xchg despite such types not being in btf_ptr_types. This could have been done with a MAYBE_MEM_ALLOC equivalent to MAYBE_NULL, but bpf_kptr_xchg is the only helper that I can forsee using MAYBE_MEM_ALLOC, so keep it special-cased for now. The verifier tags bpf_kptr_xchg retval MEM_ALLOC if and only if the BTF associated with the retval is not kernel BTF. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 073c56dbec63e..519d465407ef6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7123,6 +7123,9 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (arg_type & PTR_MAYBE_NULL) type &= ~PTR_MAYBE_NULL; + if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC) + type &= ~MEM_ALLOC; + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; if (expected == NOT_INIT) @@ -7185,7 +7188,8 @@ found: break; } case PTR_TO_BTF_ID | MEM_ALLOC: - if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) { + if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && + meta->func_id != BPF_FUNC_kptr_xchg) { verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } @@ -9151,6 +9155,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (func_id == BPF_FUNC_kptr_xchg) { ret_btf = meta.kptr_field->kptr.btf; ret_btf_id = meta.kptr_field->kptr.btf_id; + if (!btf_is_kernel(ret_btf)) + regs[BPF_REG_0].type |= MEM_ALLOC; } else { if (fn->ret_btf_id == BPF_PTR_POISON) { verbose(env, "verifier internal error:"); -- GitLab From 5d8d6634cccf1ebd0db4e220e52e7128b030c7b4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 10 Mar 2023 15:07:43 -0800 Subject: [PATCH 0342/2078] selftests/bpf: Add local kptr stashing test Add a new selftest, local_kptr_stash, which uses bpf_kptr_xchg to stash a bpf_obj_new-allocated object in a map. Test the following scenarios: * Stash two rb_nodes in an arraymap, don't unstash them, rely on map free to destruct them * Stash two rb_nodes in an arraymap, unstash the second one in a separate program, rely on map free to destruct first Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230310230743.2320707-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/local_kptr_stash.c | 60 ++++++++++ .../selftests/bpf/progs/local_kptr_stash.c | 108 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c create mode 100644 tools/testing/selftests/bpf/progs/local_kptr_stash.c diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c new file mode 100644 index 0000000000000..76f1da877f818 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "local_kptr_stash.skel.h" +static void test_local_kptr_stash_simple(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); + + local_kptr_stash__destroy(skel); +} + +static void test_local_kptr_stash_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.unstash_rb_node), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); + ASSERT_EQ(opts.retval, 42, "local_kptr_stash_add_nodes retval"); + + local_kptr_stash__destroy(skel); +} + +void test_local_kptr_stash_success(void) +{ + if (test__start_subtest("local_kptr_stash_simple")) + test_local_kptr_stash_simple(); + if (test__start_subtest("local_kptr_stash_unstash")) + test_local_kptr_stash_unstash(); +} diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c new file mode 100644 index 0000000000000..0ef286da092bb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_experimental.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +struct map_value { + struct prog_test_ref_kfunc *not_kptr; + struct prog_test_ref_kfunc __kptr *val; + struct node_data __kptr *node; +}; + +/* This is necessary so that LLVM generates BTF for node_data struct + * If it's not included, a fwd reference for node_data will be generated but + * no struct. Example BTF of "node" field in map_value when not included: + * + * [10] PTR '(anon)' type_id=35 + * [34] FWD 'node_data' fwd_kind=struct + * [35] TYPE_TAG 'kptr_ref' type_id=34 + * + * (with no node_data struct defined) + * Had to do the same w/ bpf_kfunc_call_test_release below + */ +struct node_data *just_here_because_btf_bug; + +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 2); +} some_nodes SEC(".maps"); + +static int create_and_stash(int idx, int val) +{ + struct map_value *mapval; + struct node_data *res; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key = val; + + res = bpf_kptr_xchg(&mapval->node, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") +long stash_rb_nodes(void *ctx) +{ + return create_and_stash(0, 41) ?: create_and_stash(1, 42); +} + +SEC("tc") +long unstash_rb_node(void *ctx) +{ + struct map_value *mapval; + struct node_data *res; + long retval; + int key = 1; + + mapval = bpf_map_lookup_elem(&some_nodes, &key); + if (!mapval) + return 1; + + res = bpf_kptr_xchg(&mapval->node, NULL); + if (res) { + retval = res->key; + bpf_obj_drop(res); + return retval; + } + return 1; +} + +SEC("tc") +long stash_test_ref_kfunc(void *ctx) +{ + struct prog_test_ref_kfunc *res; + struct map_value *mapval; + int key = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &key); + if (!mapval) + return 1; + + res = bpf_kptr_xchg(&mapval->val, NULL); + if (res) + bpf_kfunc_call_test_release(res); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From 99ce286d2d30a31eba4171036bc3f32eeb59e5f3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 23:09:25 +0100 Subject: [PATCH 0343/2078] net: lan966x: Add IS1 VCAP model Provide IS1 (ingress stage 1) VCAP model for lan966x. This provides classification actions for lan966x. Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_vcap_ag_api.c | 1402 ++++++++++++++++- .../net/ethernet/microchip/vcap/vcap_ag_api.h | 217 ++- .../microchip/vcap/vcap_api_debugfs_kunit.c | 4 +- 3 files changed, 1563 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c index 928e711960e6b..66400a082d029 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c @@ -6,6 +6,965 @@ #include "lan966x_vcap_ag_api.h" /* keyfields */ +static const struct vcap_field is1_normal_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_BIT, + .offset = 0, + .width = 1, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 1, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 12, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 19, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 31, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 32, + .width = 3, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 35, + .width = 48, + }, + [VCAP_KF_ETYPE_LEN_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 83, + .width = 1, + }, + [VCAP_KF_ETYPE] = { + .type = VCAP_FIELD_U32, + .offset = 84, + .width = 16, + }, + [VCAP_KF_IP_SNAP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 100, + .width = 1, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 101, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 102, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 103, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 104, + .width = 1, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 105, + .width = 6, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 111, + .width = 32, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 143, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 144, + .width = 1, + }, + [VCAP_KF_L4_SPORT] = { + .type = VCAP_FIELD_U32, + .offset = 145, + .width = 16, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 161, + .width = 8, + }, +}; + +static const struct vcap_field is1_5tuple_ip4_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_BIT, + .offset = 0, + .width = 1, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 1, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 12, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 19, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 31, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 32, + .width = 3, + }, + [VCAP_KF_8021Q_TPID1] = { + .type = VCAP_FIELD_BIT, + .offset = 35, + .width = 1, + }, + [VCAP_KF_8021Q_VID1] = { + .type = VCAP_FIELD_U32, + .offset = 36, + .width = 12, + }, + [VCAP_KF_8021Q_DEI1] = { + .type = VCAP_FIELD_BIT, + .offset = 48, + .width = 1, + }, + [VCAP_KF_8021Q_PCP1] = { + .type = VCAP_FIELD_U32, + .offset = 49, + .width = 3, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 52, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 53, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 54, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 55, + .width = 1, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 56, + .width = 6, + }, + [VCAP_KF_L3_IP4_DIP] = { + .type = VCAP_FIELD_U32, + .offset = 62, + .width = 32, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 94, + .width = 32, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 126, + .width = 8, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 134, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 135, + .width = 1, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 136, + .width = 8, + }, + [VCAP_KF_IP_PAYLOAD_5TUPLE] = { + .type = VCAP_FIELD_U32, + .offset = 144, + .width = 32, + }, +}; + +static const struct vcap_field is1_normal_ip6_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 19, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 20, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 3, + }, + [VCAP_KF_8021Q_TPID1] = { + .type = VCAP_FIELD_BIT, + .offset = 36, + .width = 1, + }, + [VCAP_KF_8021Q_VID1] = { + .type = VCAP_FIELD_U32, + .offset = 37, + .width = 12, + }, + [VCAP_KF_8021Q_DEI1] = { + .type = VCAP_FIELD_BIT, + .offset = 49, + .width = 1, + }, + [VCAP_KF_8021Q_PCP1] = { + .type = VCAP_FIELD_U32, + .offset = 50, + .width = 3, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 53, + .width = 48, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 101, + .width = 6, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 107, + .width = 128, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 235, + .width = 8, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 243, + .width = 1, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 244, + .width = 8, + }, + [VCAP_KF_IP_PAYLOAD_S1_IP6] = { + .type = VCAP_FIELD_U112, + .offset = 252, + .width = 112, + }, +}; + +static const struct vcap_field is1_7tuple_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 19, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 20, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 3, + }, + [VCAP_KF_8021Q_TPID1] = { + .type = VCAP_FIELD_BIT, + .offset = 36, + .width = 1, + }, + [VCAP_KF_8021Q_VID1] = { + .type = VCAP_FIELD_U32, + .offset = 37, + .width = 12, + }, + [VCAP_KF_8021Q_DEI1] = { + .type = VCAP_FIELD_BIT, + .offset = 49, + .width = 1, + }, + [VCAP_KF_8021Q_PCP1] = { + .type = VCAP_FIELD_U32, + .offset = 50, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 53, + .width = 48, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 101, + .width = 48, + }, + [VCAP_KF_ETYPE_LEN_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 149, + .width = 1, + }, + [VCAP_KF_ETYPE] = { + .type = VCAP_FIELD_U32, + .offset = 150, + .width = 16, + }, + [VCAP_KF_IP_SNAP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 166, + .width = 1, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 167, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 168, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 169, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 170, + .width = 1, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 171, + .width = 6, + }, + [VCAP_KF_L3_IP6_DIP_MSB] = { + .type = VCAP_FIELD_U32, + .offset = 177, + .width = 16, + }, + [VCAP_KF_L3_IP6_DIP] = { + .type = VCAP_FIELD_U64, + .offset = 193, + .width = 64, + }, + [VCAP_KF_L3_IP6_SIP_MSB] = { + .type = VCAP_FIELD_U32, + .offset = 257, + .width = 16, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U64, + .offset = 273, + .width = 64, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 337, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 338, + .width = 1, + }, + [VCAP_KF_L4_SPORT] = { + .type = VCAP_FIELD_U32, + .offset = 339, + .width = 16, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 355, + .width = 8, + }, +}; + +static const struct vcap_field is1_5tuple_ip6_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 19, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 20, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 3, + }, + [VCAP_KF_8021Q_TPID1] = { + .type = VCAP_FIELD_BIT, + .offset = 36, + .width = 1, + }, + [VCAP_KF_8021Q_VID1] = { + .type = VCAP_FIELD_U32, + .offset = 37, + .width = 12, + }, + [VCAP_KF_8021Q_DEI1] = { + .type = VCAP_FIELD_BIT, + .offset = 49, + .width = 1, + }, + [VCAP_KF_8021Q_PCP1] = { + .type = VCAP_FIELD_U32, + .offset = 50, + .width = 3, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 53, + .width = 6, + }, + [VCAP_KF_L3_IP6_DIP] = { + .type = VCAP_FIELD_U128, + .offset = 59, + .width = 128, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 187, + .width = 128, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 315, + .width = 8, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 323, + .width = 1, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 324, + .width = 8, + }, + [VCAP_KF_IP_PAYLOAD_5TUPLE] = { + .type = VCAP_FIELD_U32, + .offset = 332, + .width = 32, + }, +}; + +static const struct vcap_field is1_dbl_vid_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 9, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_IP_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 19, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 20, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 3, + }, + [VCAP_KF_8021Q_TPID1] = { + .type = VCAP_FIELD_BIT, + .offset = 36, + .width = 1, + }, + [VCAP_KF_8021Q_VID1] = { + .type = VCAP_FIELD_U32, + .offset = 37, + .width = 12, + }, + [VCAP_KF_8021Q_DEI1] = { + .type = VCAP_FIELD_BIT, + .offset = 49, + .width = 1, + }, + [VCAP_KF_8021Q_PCP1] = { + .type = VCAP_FIELD_U32, + .offset = 50, + .width = 3, + }, + [VCAP_KF_ETYPE_LEN_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 53, + .width = 1, + }, + [VCAP_KF_ETYPE] = { + .type = VCAP_FIELD_U32, + .offset = 54, + .width = 16, + }, + [VCAP_KF_IP_SNAP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 70, + .width = 1, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 71, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 72, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 73, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 74, + .width = 1, + }, + [VCAP_KF_L3_DSCP] = { + .type = VCAP_FIELD_U32, + .offset = 75, + .width = 6, + }, + [VCAP_KF_TCP_UDP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 81, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 82, + .width = 1, + }, +}; + +static const struct vcap_field is1_rt_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 2, + .width = 1, + }, + [VCAP_KF_IF_IGR_PORT] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 3, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 6, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 7, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 8, + .width = 1, + }, + [VCAP_KF_L2_MAC] = { + .type = VCAP_FIELD_U48, + .offset = 9, + .width = 48, + }, + [VCAP_KF_RT_VLAN_IDX] = { + .type = VCAP_FIELD_U32, + .offset = 57, + .width = 3, + }, + [VCAP_KF_RT_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 60, + .width = 2, + }, + [VCAP_KF_RT_FRMID] = { + .type = VCAP_FIELD_U32, + .offset = 62, + .width = 32, + }, +}; + +static const struct vcap_field is1_dmac_vid_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_INDEX] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 2, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 9, + }, + [VCAP_KF_8021CB_R_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 14, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 15, + .width = 1, + }, + [VCAP_KF_8021Q_TPID0] = { + .type = VCAP_FIELD_BIT, + .offset = 16, + .width = 1, + }, + [VCAP_KF_8021Q_VID0] = { + .type = VCAP_FIELD_U32, + .offset = 17, + .width = 12, + }, + [VCAP_KF_8021Q_DEI0] = { + .type = VCAP_FIELD_BIT, + .offset = 29, + .width = 1, + }, + [VCAP_KF_8021Q_PCP0] = { + .type = VCAP_FIELD_U32, + .offset = 30, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 33, + .width = 48, + }, +}; + static const struct vcap_field is2_mac_etype_keyfield[] = { [VCAP_KF_TYPE] = { .type = VCAP_FIELD_U32, @@ -1163,6 +2122,49 @@ static const struct vcap_field is2_smac_sip6_keyfield[] = { }; /* keyfield_set */ +static const struct vcap_set is1_keyfield_set[] = { + [VCAP_KFS_NORMAL] = { + .type_id = 0, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_5TUPLE_IP4] = { + .type_id = 1, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_NORMAL_IP6] = { + .type_id = 0, + .sw_per_item = 4, + .sw_cnt = 1, + }, + [VCAP_KFS_7TUPLE] = { + .type_id = 1, + .sw_per_item = 4, + .sw_cnt = 1, + }, + [VCAP_KFS_5TUPLE_IP6] = { + .type_id = 2, + .sw_per_item = 4, + .sw_cnt = 1, + }, + [VCAP_KFS_DBL_VID] = { + .type_id = 0, + .sw_per_item = 1, + .sw_cnt = 4, + }, + [VCAP_KFS_RT] = { + .type_id = 1, + .sw_per_item = 1, + .sw_cnt = 4, + }, + [VCAP_KFS_DMAC_VID] = { + .type_id = 2, + .sw_per_item = 1, + .sw_cnt = 4, + }, +}; + static const struct vcap_set is2_keyfield_set[] = { [VCAP_KFS_MAC_ETYPE] = { .type_id = 0, @@ -1227,6 +2229,17 @@ static const struct vcap_set is2_keyfield_set[] = { }; /* keyfield_set map */ +static const struct vcap_field *is1_keyfield_set_map[] = { + [VCAP_KFS_NORMAL] = is1_normal_keyfield, + [VCAP_KFS_5TUPLE_IP4] = is1_5tuple_ip4_keyfield, + [VCAP_KFS_NORMAL_IP6] = is1_normal_ip6_keyfield, + [VCAP_KFS_7TUPLE] = is1_7tuple_keyfield, + [VCAP_KFS_5TUPLE_IP6] = is1_5tuple_ip6_keyfield, + [VCAP_KFS_DBL_VID] = is1_dbl_vid_keyfield, + [VCAP_KFS_RT] = is1_rt_keyfield, + [VCAP_KFS_DMAC_VID] = is1_dmac_vid_keyfield, +}; + static const struct vcap_field *is2_keyfield_set_map[] = { [VCAP_KFS_MAC_ETYPE] = is2_mac_etype_keyfield, [VCAP_KFS_MAC_LLC] = is2_mac_llc_keyfield, @@ -1243,6 +2256,17 @@ static const struct vcap_field *is2_keyfield_set_map[] = { }; /* keyfield_set map sizes */ +static int is1_keyfield_set_map_size[] = { + [VCAP_KFS_NORMAL] = ARRAY_SIZE(is1_normal_keyfield), + [VCAP_KFS_5TUPLE_IP4] = ARRAY_SIZE(is1_5tuple_ip4_keyfield), + [VCAP_KFS_NORMAL_IP6] = ARRAY_SIZE(is1_normal_ip6_keyfield), + [VCAP_KFS_7TUPLE] = ARRAY_SIZE(is1_7tuple_keyfield), + [VCAP_KFS_5TUPLE_IP6] = ARRAY_SIZE(is1_5tuple_ip6_keyfield), + [VCAP_KFS_DBL_VID] = ARRAY_SIZE(is1_dbl_vid_keyfield), + [VCAP_KFS_RT] = ARRAY_SIZE(is1_rt_keyfield), + [VCAP_KFS_DMAC_VID] = ARRAY_SIZE(is1_dmac_vid_keyfield), +}; + static int is2_keyfield_set_map_size[] = { [VCAP_KFS_MAC_ETYPE] = ARRAY_SIZE(is2_mac_etype_keyfield), [VCAP_KFS_MAC_LLC] = ARRAY_SIZE(is2_mac_llc_keyfield), @@ -1259,6 +2283,154 @@ static int is2_keyfield_set_map_size[] = { }; /* actionfields */ +static const struct vcap_field is1_s1_actionfield[] = { + [VCAP_AF_TYPE] = { + .type = VCAP_FIELD_BIT, + .offset = 0, + .width = 1, + }, + [VCAP_AF_DSCP_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 1, + .width = 1, + }, + [VCAP_AF_DSCP_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 6, + }, + [VCAP_AF_QOS_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 8, + .width = 1, + }, + [VCAP_AF_QOS_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 9, + .width = 3, + }, + [VCAP_AF_DP_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 12, + .width = 1, + }, + [VCAP_AF_DP_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 13, + .width = 1, + }, + [VCAP_AF_PAG_OVERRIDE_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 14, + .width = 8, + }, + [VCAP_AF_PAG_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 22, + .width = 8, + }, + [VCAP_AF_ISDX_REPLACE_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 30, + .width = 1, + }, + [VCAP_AF_ISDX_ADD_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 31, + .width = 8, + }, + [VCAP_AF_VID_REPLACE_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_AF_VID_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 12, + }, + [VCAP_AF_PCP_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 67, + .width = 1, + }, + [VCAP_AF_PCP_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 68, + .width = 3, + }, + [VCAP_AF_DEI_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 71, + .width = 1, + }, + [VCAP_AF_DEI_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 72, + .width = 1, + }, + [VCAP_AF_VLAN_POP_CNT_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 73, + .width = 1, + }, + [VCAP_AF_VLAN_POP_CNT] = { + .type = VCAP_FIELD_U32, + .offset = 74, + .width = 2, + }, + [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = { + .type = VCAP_FIELD_U32, + .offset = 76, + .width = 4, + }, + [VCAP_AF_SFID_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 80, + .width = 1, + }, + [VCAP_AF_SFID_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 81, + .width = 8, + }, + [VCAP_AF_SGID_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 89, + .width = 1, + }, + [VCAP_AF_SGID_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 90, + .width = 8, + }, + [VCAP_AF_POLICE_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 98, + .width = 1, + }, + [VCAP_AF_POLICE_IDX] = { + .type = VCAP_FIELD_U32, + .offset = 99, + .width = 9, + }, + [VCAP_AF_OAM_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 108, + .width = 3, + }, + [VCAP_AF_MRP_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 111, + .width = 2, + }, + [VCAP_AF_DLR_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 113, + .width = 2, + }, +}; + static const struct vcap_field is2_base_type_actionfield[] = { [VCAP_AF_HIT_ME_ONCE] = { .type = VCAP_FIELD_BIT, @@ -1351,6 +2523,14 @@ static const struct vcap_field is2_smac_sip_actionfield[] = { }; /* actionfield_set */ +static const struct vcap_set is1_actionfield_set[] = { + [VCAP_AFS_S1] = { + .type_id = 0, + .sw_per_item = 1, + .sw_cnt = 4, + }, +}; + static const struct vcap_set is2_actionfield_set[] = { [VCAP_AFS_BASE_TYPE] = { .type_id = -1, @@ -1365,18 +2545,73 @@ static const struct vcap_set is2_actionfield_set[] = { }; /* actionfield_set map */ +static const struct vcap_field *is1_actionfield_set_map[] = { + [VCAP_AFS_S1] = is1_s1_actionfield, +}; + static const struct vcap_field *is2_actionfield_set_map[] = { [VCAP_AFS_BASE_TYPE] = is2_base_type_actionfield, [VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield, }; /* actionfield_set map size */ +static int is1_actionfield_set_map_size[] = { + [VCAP_AFS_S1] = ARRAY_SIZE(is1_s1_actionfield), +}; + static int is2_actionfield_set_map_size[] = { [VCAP_AFS_BASE_TYPE] = ARRAY_SIZE(is2_base_type_actionfield), [VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield), }; /* Type Groups */ +static const struct vcap_typegroup is1_x4_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 3, + .value = 4, + }, + { + .offset = 96, + .width = 1, + .value = 0, + }, + { + .offset = 192, + .width = 2, + .value = 0, + }, + { + .offset = 288, + .width = 1, + .value = 0, + }, + {} +}; + +static const struct vcap_typegroup is1_x2_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 2, + .value = 2, + }, + { + .offset = 96, + .width = 1, + .value = 0, + }, + {} +}; + +static const struct vcap_typegroup is1_x1_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 1, + .value = 1, + }, + {} +}; + static const struct vcap_typegroup is2_x4_keyfield_set_typegroups[] = { { .offset = 0, @@ -1424,6 +2659,13 @@ static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup *is1_keyfield_set_typegroups[] = { + [4] = is1_x4_keyfield_set_typegroups, + [2] = is1_x2_keyfield_set_typegroups, + [1] = is1_x1_keyfield_set_typegroups, + [5] = NULL, +}; + static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = { [4] = is2_x4_keyfield_set_typegroups, [2] = is2_x2_keyfield_set_typegroups, @@ -1431,6 +2673,10 @@ static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = { [5] = NULL, }; +static const struct vcap_typegroup is1_x1_actionfield_set_typegroups[] = { + {} +}; + static const struct vcap_typegroup is2_x2_actionfield_set_typegroups[] = { { .offset = 0, @@ -1454,6 +2700,11 @@ static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup *is1_actionfield_set_typegroups[] = { + [1] = is1_x1_actionfield_set_typegroups, + [5] = NULL, +}; + static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = { [2] = is2_x2_actionfield_set_typegroups, [1] = is2_x1_actionfield_set_typegroups, @@ -1463,16 +2714,33 @@ static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = { /* Keyfieldset names */ static const char * const vcap_keyfield_set_names[] = { [VCAP_KFS_NO_VALUE] = "(None)", + [VCAP_KFS_5TUPLE_IP4] = "VCAP_KFS_5TUPLE_IP4", + [VCAP_KFS_5TUPLE_IP6] = "VCAP_KFS_5TUPLE_IP6", + [VCAP_KFS_7TUPLE] = "VCAP_KFS_7TUPLE", [VCAP_KFS_ARP] = "VCAP_KFS_ARP", + [VCAP_KFS_DBL_VID] = "VCAP_KFS_DBL_VID", + [VCAP_KFS_DMAC_VID] = "VCAP_KFS_DMAC_VID", + [VCAP_KFS_ETAG] = "VCAP_KFS_ETAG", [VCAP_KFS_IP4_OTHER] = "VCAP_KFS_IP4_OTHER", [VCAP_KFS_IP4_TCP_UDP] = "VCAP_KFS_IP4_TCP_UDP", + [VCAP_KFS_IP4_VID] = "VCAP_KFS_IP4_VID", [VCAP_KFS_IP6_OTHER] = "VCAP_KFS_IP6_OTHER", [VCAP_KFS_IP6_STD] = "VCAP_KFS_IP6_STD", [VCAP_KFS_IP6_TCP_UDP] = "VCAP_KFS_IP6_TCP_UDP", + [VCAP_KFS_IP6_VID] = "VCAP_KFS_IP6_VID", + [VCAP_KFS_IP_7TUPLE] = "VCAP_KFS_IP_7TUPLE", + [VCAP_KFS_ISDX] = "VCAP_KFS_ISDX", + [VCAP_KFS_LL_FULL] = "VCAP_KFS_LL_FULL", [VCAP_KFS_MAC_ETYPE] = "VCAP_KFS_MAC_ETYPE", [VCAP_KFS_MAC_LLC] = "VCAP_KFS_MAC_LLC", [VCAP_KFS_MAC_SNAP] = "VCAP_KFS_MAC_SNAP", + [VCAP_KFS_NORMAL] = "VCAP_KFS_NORMAL", + [VCAP_KFS_NORMAL_5TUPLE_IP4] = "VCAP_KFS_NORMAL_5TUPLE_IP4", + [VCAP_KFS_NORMAL_7TUPLE] = "VCAP_KFS_NORMAL_7TUPLE", + [VCAP_KFS_NORMAL_IP6] = "VCAP_KFS_NORMAL_IP6", [VCAP_KFS_OAM] = "VCAP_KFS_OAM", + [VCAP_KFS_PURE_5TUPLE_IP4] = "VCAP_KFS_PURE_5TUPLE_IP4", + [VCAP_KFS_RT] = "VCAP_KFS_RT", [VCAP_KFS_SMAC_SIP4] = "VCAP_KFS_SMAC_SIP4", [VCAP_KFS_SMAC_SIP6] = "VCAP_KFS_SMAC_SIP6", }; @@ -1481,16 +2749,42 @@ static const char * const vcap_keyfield_set_names[] = { static const char * const vcap_actionfield_set_names[] = { [VCAP_AFS_NO_VALUE] = "(None)", [VCAP_AFS_BASE_TYPE] = "VCAP_AFS_BASE_TYPE", + [VCAP_AFS_CLASSIFICATION] = "VCAP_AFS_CLASSIFICATION", + [VCAP_AFS_CLASS_REDUCED] = "VCAP_AFS_CLASS_REDUCED", + [VCAP_AFS_FULL] = "VCAP_AFS_FULL", + [VCAP_AFS_S1] = "VCAP_AFS_S1", [VCAP_AFS_SMAC_SIP] = "VCAP_AFS_SMAC_SIP", }; /* Keyfield names */ static const char * const vcap_keyfield_names[] = { [VCAP_KF_NO_VALUE] = "(None)", + [VCAP_KF_8021BR_ECID_BASE] = "8021BR_ECID_BASE", + [VCAP_KF_8021BR_ECID_EXT] = "8021BR_ECID_EXT", + [VCAP_KF_8021BR_E_TAGGED] = "8021BR_E_TAGGED", + [VCAP_KF_8021BR_GRP] = "8021BR_GRP", + [VCAP_KF_8021BR_IGR_ECID_BASE] = "8021BR_IGR_ECID_BASE", + [VCAP_KF_8021BR_IGR_ECID_EXT] = "8021BR_IGR_ECID_EXT", + [VCAP_KF_8021CB_R_TAGGED_IS] = "8021CB_R_TAGGED_IS", + [VCAP_KF_8021Q_DEI0] = "8021Q_DEI0", + [VCAP_KF_8021Q_DEI1] = "8021Q_DEI1", + [VCAP_KF_8021Q_DEI2] = "8021Q_DEI2", [VCAP_KF_8021Q_DEI_CLS] = "8021Q_DEI_CLS", + [VCAP_KF_8021Q_PCP0] = "8021Q_PCP0", + [VCAP_KF_8021Q_PCP1] = "8021Q_PCP1", + [VCAP_KF_8021Q_PCP2] = "8021Q_PCP2", [VCAP_KF_8021Q_PCP_CLS] = "8021Q_PCP_CLS", + [VCAP_KF_8021Q_TPID0] = "8021Q_TPID0", + [VCAP_KF_8021Q_TPID1] = "8021Q_TPID1", + [VCAP_KF_8021Q_TPID2] = "8021Q_TPID2", + [VCAP_KF_8021Q_VID0] = "8021Q_VID0", + [VCAP_KF_8021Q_VID1] = "8021Q_VID1", + [VCAP_KF_8021Q_VID2] = "8021Q_VID2", [VCAP_KF_8021Q_VID_CLS] = "8021Q_VID_CLS", + [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = "8021Q_VLAN_DBL_TAGGED_IS", [VCAP_KF_8021Q_VLAN_TAGGED_IS] = "8021Q_VLAN_TAGGED_IS", + [VCAP_KF_8021Q_VLAN_TAGS] = "8021Q_VLAN_TAGS", + [VCAP_KF_ACL_GRP_ID] = "ACL_GRP_ID", [VCAP_KF_ARP_ADDR_SPACE_OK_IS] = "ARP_ADDR_SPACE_OK_IS", [VCAP_KF_ARP_LEN_OK_IS] = "ARP_LEN_OK_IS", [VCAP_KF_ARP_OPCODE] = "ARP_OPCODE", @@ -1498,32 +2792,57 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_ARP_PROTO_SPACE_OK_IS] = "ARP_PROTO_SPACE_OK_IS", [VCAP_KF_ARP_SENDER_MATCH_IS] = "ARP_SENDER_MATCH_IS", [VCAP_KF_ARP_TGT_MATCH_IS] = "ARP_TGT_MATCH_IS", + [VCAP_KF_COSID_CLS] = "COSID_CLS", + [VCAP_KF_ES0_ISDX_KEY_ENA] = "ES0_ISDX_KEY_ENA", [VCAP_KF_ETYPE] = "ETYPE", + [VCAP_KF_ETYPE_LEN_IS] = "ETYPE_LEN_IS", [VCAP_KF_HOST_MATCH] = "HOST_MATCH", + [VCAP_KF_IF_EGR_PORT_MASK] = "IF_EGR_PORT_MASK", + [VCAP_KF_IF_EGR_PORT_MASK_RNG] = "IF_EGR_PORT_MASK_RNG", [VCAP_KF_IF_IGR_PORT] = "IF_IGR_PORT", [VCAP_KF_IF_IGR_PORT_MASK] = "IF_IGR_PORT_MASK", + [VCAP_KF_IF_IGR_PORT_MASK_L3] = "IF_IGR_PORT_MASK_L3", + [VCAP_KF_IF_IGR_PORT_MASK_RNG] = "IF_IGR_PORT_MASK_RNG", + [VCAP_KF_IF_IGR_PORT_MASK_SEL] = "IF_IGR_PORT_MASK_SEL", + [VCAP_KF_IF_IGR_PORT_SEL] = "IF_IGR_PORT_SEL", [VCAP_KF_IP4_IS] = "IP4_IS", + [VCAP_KF_IP_MC_IS] = "IP_MC_IS", + [VCAP_KF_IP_PAYLOAD_5TUPLE] = "IP_PAYLOAD_5TUPLE", + [VCAP_KF_IP_PAYLOAD_S1_IP6] = "IP_PAYLOAD_S1_IP6", + [VCAP_KF_IP_SNAP_IS] = "IP_SNAP_IS", + [VCAP_KF_ISDX_CLS] = "ISDX_CLS", [VCAP_KF_ISDX_GT0_IS] = "ISDX_GT0_IS", [VCAP_KF_L2_BC_IS] = "L2_BC_IS", [VCAP_KF_L2_DMAC] = "L2_DMAC", [VCAP_KF_L2_FRM_TYPE] = "L2_FRM_TYPE", + [VCAP_KF_L2_FWD_IS] = "L2_FWD_IS", [VCAP_KF_L2_LLC] = "L2_LLC", + [VCAP_KF_L2_MAC] = "L2_MAC", [VCAP_KF_L2_MC_IS] = "L2_MC_IS", [VCAP_KF_L2_PAYLOAD0] = "L2_PAYLOAD0", [VCAP_KF_L2_PAYLOAD1] = "L2_PAYLOAD1", [VCAP_KF_L2_PAYLOAD2] = "L2_PAYLOAD2", + [VCAP_KF_L2_PAYLOAD_ETYPE] = "L2_PAYLOAD_ETYPE", [VCAP_KF_L2_SMAC] = "L2_SMAC", [VCAP_KF_L2_SNAP] = "L2_SNAP", [VCAP_KF_L3_DIP_EQ_SIP_IS] = "L3_DIP_EQ_SIP_IS", + [VCAP_KF_L3_DPL_CLS] = "L3_DPL_CLS", + [VCAP_KF_L3_DSCP] = "L3_DSCP", + [VCAP_KF_L3_DST_IS] = "L3_DST_IS", [VCAP_KF_L3_FRAGMENT] = "L3_FRAGMENT", + [VCAP_KF_L3_FRAGMENT_TYPE] = "L3_FRAGMENT_TYPE", + [VCAP_KF_L3_FRAG_INVLD_L4_LEN] = "L3_FRAG_INVLD_L4_LEN", [VCAP_KF_L3_FRAG_OFS_GT0] = "L3_FRAG_OFS_GT0", [VCAP_KF_L3_IP4_DIP] = "L3_IP4_DIP", [VCAP_KF_L3_IP4_SIP] = "L3_IP4_SIP", [VCAP_KF_L3_IP6_DIP] = "L3_IP6_DIP", + [VCAP_KF_L3_IP6_DIP_MSB] = "L3_IP6_DIP_MSB", [VCAP_KF_L3_IP6_SIP] = "L3_IP6_SIP", + [VCAP_KF_L3_IP6_SIP_MSB] = "L3_IP6_SIP_MSB", [VCAP_KF_L3_IP_PROTO] = "L3_IP_PROTO", [VCAP_KF_L3_OPTIONS_IS] = "L3_OPTIONS_IS", [VCAP_KF_L3_PAYLOAD] = "L3_PAYLOAD", + [VCAP_KF_L3_RT_IS] = "L3_RT_IS", [VCAP_KF_L3_TOS] = "L3_TOS", [VCAP_KF_L3_TTL_GT0] = "L3_TTL_GT0", [VCAP_KF_L4_1588_DOM] = "L4_1588_DOM", @@ -1531,6 +2850,7 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_L4_ACK] = "L4_ACK", [VCAP_KF_L4_DPORT] = "L4_DPORT", [VCAP_KF_L4_FIN] = "L4_FIN", + [VCAP_KF_L4_PAYLOAD] = "L4_PAYLOAD", [VCAP_KF_L4_PSH] = "L4_PSH", [VCAP_KF_L4_RNG] = "L4_RNG", [VCAP_KF_L4_RST] = "L4_RST", @@ -1540,7 +2860,11 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_L4_SYN] = "L4_SYN", [VCAP_KF_L4_URG] = "L4_URG", [VCAP_KF_LOOKUP_FIRST_IS] = "LOOKUP_FIRST_IS", + [VCAP_KF_LOOKUP_GEN_IDX] = "LOOKUP_GEN_IDX", + [VCAP_KF_LOOKUP_GEN_IDX_SEL] = "LOOKUP_GEN_IDX_SEL", + [VCAP_KF_LOOKUP_INDEX] = "LOOKUP_INDEX", [VCAP_KF_LOOKUP_PAG] = "LOOKUP_PAG", + [VCAP_KF_MIRROR_PROBE] = "MIRROR_PROBE", [VCAP_KF_OAM_CCM_CNTS_EQ0] = "OAM_CCM_CNTS_EQ0", [VCAP_KF_OAM_DETECTED] = "OAM_DETECTED", [VCAP_KF_OAM_FLAGS] = "OAM_FLAGS", @@ -1549,7 +2873,12 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_OAM_OPCODE] = "OAM_OPCODE", [VCAP_KF_OAM_VER] = "OAM_VER", [VCAP_KF_OAM_Y1731_IS] = "OAM_Y1731_IS", + [VCAP_KF_PROT_ACTIVE] = "PROT_ACTIVE", + [VCAP_KF_RT_FRMID] = "RT_FRMID", + [VCAP_KF_RT_TYPE] = "RT_TYPE", + [VCAP_KF_RT_VLAN_IDX] = "RT_VLAN_IDX", [VCAP_KF_TCP_IS] = "TCP_IS", + [VCAP_KF_TCP_UDP_IS] = "TCP_UDP_IS", [VCAP_KF_TYPE] = "TYPE", }; @@ -1557,24 +2886,95 @@ static const char * const vcap_keyfield_names[] = { static const char * const vcap_actionfield_names[] = { [VCAP_AF_NO_VALUE] = "(None)", [VCAP_AF_ACL_ID] = "ACL_ID", + [VCAP_AF_CLS_VID_SEL] = "CLS_VID_SEL", + [VCAP_AF_CNT_ID] = "CNT_ID", + [VCAP_AF_COPY_PORT_NUM] = "COPY_PORT_NUM", + [VCAP_AF_COPY_QUEUE_NUM] = "COPY_QUEUE_NUM", [VCAP_AF_CPU_COPY_ENA] = "CPU_COPY_ENA", [VCAP_AF_CPU_QUEUE_NUM] = "CPU_QUEUE_NUM", + [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = "CUSTOM_ACE_TYPE_ENA", + [VCAP_AF_DEI_ENA] = "DEI_ENA", + [VCAP_AF_DEI_VAL] = "DEI_VAL", + [VCAP_AF_DLR_SEL] = "DLR_SEL", + [VCAP_AF_DP_ENA] = "DP_ENA", + [VCAP_AF_DP_VAL] = "DP_VAL", + [VCAP_AF_DSCP_ENA] = "DSCP_ENA", + [VCAP_AF_DSCP_VAL] = "DSCP_VAL", + [VCAP_AF_ES2_REW_CMD] = "ES2_REW_CMD", [VCAP_AF_FWD_KILL_ENA] = "FWD_KILL_ENA", + [VCAP_AF_FWD_MODE] = "FWD_MODE", [VCAP_AF_HIT_ME_ONCE] = "HIT_ME_ONCE", [VCAP_AF_HOST_MATCH] = "HOST_MATCH", + [VCAP_AF_IGNORE_PIPELINE_CTRL] = "IGNORE_PIPELINE_CTRL", + [VCAP_AF_INTR_ENA] = "INTR_ENA", + [VCAP_AF_ISDX_ADD_REPLACE_SEL] = "ISDX_ADD_REPLACE_SEL", + [VCAP_AF_ISDX_ADD_VAL] = "ISDX_ADD_VAL", [VCAP_AF_ISDX_ENA] = "ISDX_ENA", + [VCAP_AF_ISDX_REPLACE_ENA] = "ISDX_REPLACE_ENA", + [VCAP_AF_ISDX_VAL] = "ISDX_VAL", [VCAP_AF_LRN_DIS] = "LRN_DIS", + [VCAP_AF_MAP_IDX] = "MAP_IDX", + [VCAP_AF_MAP_KEY] = "MAP_KEY", + [VCAP_AF_MAP_LOOKUP_SEL] = "MAP_LOOKUP_SEL", [VCAP_AF_MASK_MODE] = "MASK_MODE", + [VCAP_AF_MATCH_ID] = "MATCH_ID", + [VCAP_AF_MATCH_ID_MASK] = "MATCH_ID_MASK", [VCAP_AF_MIRROR_ENA] = "MIRROR_ENA", + [VCAP_AF_MIRROR_PROBE] = "MIRROR_PROBE", + [VCAP_AF_MIRROR_PROBE_ID] = "MIRROR_PROBE_ID", + [VCAP_AF_MRP_SEL] = "MRP_SEL", + [VCAP_AF_NXT_IDX] = "NXT_IDX", + [VCAP_AF_NXT_IDX_CTRL] = "NXT_IDX_CTRL", + [VCAP_AF_OAM_SEL] = "OAM_SEL", + [VCAP_AF_PAG_OVERRIDE_MASK] = "PAG_OVERRIDE_MASK", + [VCAP_AF_PAG_VAL] = "PAG_VAL", + [VCAP_AF_PCP_ENA] = "PCP_ENA", + [VCAP_AF_PCP_VAL] = "PCP_VAL", + [VCAP_AF_PIPELINE_FORCE_ENA] = "PIPELINE_FORCE_ENA", + [VCAP_AF_PIPELINE_PT] = "PIPELINE_PT", [VCAP_AF_POLICE_ENA] = "POLICE_ENA", [VCAP_AF_POLICE_IDX] = "POLICE_IDX", + [VCAP_AF_POLICE_REMARK] = "POLICE_REMARK", [VCAP_AF_POLICE_VCAP_ONLY] = "POLICE_VCAP_ONLY", [VCAP_AF_PORT_MASK] = "PORT_MASK", + [VCAP_AF_QOS_ENA] = "QOS_ENA", + [VCAP_AF_QOS_VAL] = "QOS_VAL", [VCAP_AF_REW_OP] = "REW_OP", + [VCAP_AF_RT_DIS] = "RT_DIS", + [VCAP_AF_SFID_ENA] = "SFID_ENA", + [VCAP_AF_SFID_VAL] = "SFID_VAL", + [VCAP_AF_SGID_ENA] = "SGID_ENA", + [VCAP_AF_SGID_VAL] = "SGID_VAL", + [VCAP_AF_TYPE] = "TYPE", + [VCAP_AF_VID_REPLACE_ENA] = "VID_REPLACE_ENA", + [VCAP_AF_VID_VAL] = "VID_VAL", + [VCAP_AF_VLAN_POP_CNT] = "VLAN_POP_CNT", + [VCAP_AF_VLAN_POP_CNT_ENA] = "VLAN_POP_CNT_ENA", }; /* VCAPs */ const struct vcap_info lan966x_vcaps[] = { + [VCAP_TYPE_IS1] = { + .name = "is1", + .rows = 192, + .sw_count = 4, + .sw_width = 96, + .sticky_width = 32, + .act_width = 123, + .default_cnt = 0, + .require_cnt_dis = 1, + .version = 1, + .keyfield_set = is1_keyfield_set, + .keyfield_set_size = ARRAY_SIZE(is1_keyfield_set), + .actionfield_set = is1_actionfield_set, + .actionfield_set_size = ARRAY_SIZE(is1_actionfield_set), + .keyfield_set_map = is1_keyfield_set_map, + .keyfield_set_map_size = is1_keyfield_set_map_size, + .actionfield_set_map = is1_actionfield_set_map, + .actionfield_set_map_size = is1_actionfield_set_map_size, + .keyfield_set_typegroups = is1_keyfield_set_typegroups, + .actionfield_set_typegroups = is1_actionfield_set_typegroups, + }, [VCAP_TYPE_IS2] = { .name = "is2", .rows = 64, @@ -1600,7 +3000,7 @@ const struct vcap_info lan966x_vcaps[] = { const struct vcap_statistics lan966x_vcap_stats = { .name = "lan966x", - .count = 1, + .count = 2, .keyfield_set_names = vcap_keyfield_set_names, .actionfield_set_names = vcap_actionfield_set_names, .keyfield_names = vcap_keyfield_names, diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h index 0844fcaeee689..a556c4419986e 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h @@ -3,8 +3,8 @@ * Microchip VCAP API */ -/* This file is autogenerated by cml-utils 2023-02-10 11:15:56 +0100. - * Commit ID: c30fb4bf0281cd4a7133bdab6682f9e43c872ada +/* This file is autogenerated by cml-utils 2023-02-16 11:41:14 +0100. + * Commit ID: be85f176b3a151fa748dcaf97c8824a5c2e065f3 */ #ifndef __VCAP_AG_API__ @@ -14,6 +14,7 @@ enum vcap_type { VCAP_TYPE_ES0, VCAP_TYPE_ES2, VCAP_TYPE_IS0, + VCAP_TYPE_IS1, VCAP_TYPE_IS2, VCAP_TYPE_MAX }; @@ -21,7 +22,12 @@ enum vcap_type { /* Keyfieldset names with origin information */ enum vcap_keyfield_set { VCAP_KFS_NO_VALUE, /* initial value */ + VCAP_KFS_5TUPLE_IP4, /* lan966x is1 X2 */ + VCAP_KFS_5TUPLE_IP6, /* lan966x is1 X4 */ + VCAP_KFS_7TUPLE, /* lan966x is1 X4 */ VCAP_KFS_ARP, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */ + VCAP_KFS_DBL_VID, /* lan966x is1 X1 */ + VCAP_KFS_DMAC_VID, /* lan966x is1 X1 */ VCAP_KFS_ETAG, /* sparx5 is0 X2 */ VCAP_KFS_IP4_OTHER, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */ VCAP_KFS_IP4_TCP_UDP, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */ @@ -36,10 +42,13 @@ enum vcap_keyfield_set { VCAP_KFS_MAC_ETYPE, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */ VCAP_KFS_MAC_LLC, /* lan966x is2 X2 */ VCAP_KFS_MAC_SNAP, /* lan966x is2 X2 */ + VCAP_KFS_NORMAL, /* lan966x is1 X2 */ VCAP_KFS_NORMAL_5TUPLE_IP4, /* sparx5 is0 X6 */ VCAP_KFS_NORMAL_7TUPLE, /* sparx5 is0 X12 */ + VCAP_KFS_NORMAL_IP6, /* lan966x is1 X4 */ VCAP_KFS_OAM, /* lan966x is2 X2 */ VCAP_KFS_PURE_5TUPLE_IP4, /* sparx5 is0 X3 */ + VCAP_KFS_RT, /* lan966x is1 X1 */ VCAP_KFS_SMAC_SIP4, /* lan966x is2 X1 */ VCAP_KFS_SMAC_SIP6, /* lan966x is2 X2 */ }; @@ -61,17 +70,20 @@ enum vcap_keyfield_set { * Used by 802.1BR Bridge Port Extension in an E-Tag * VCAP_KF_8021BR_IGR_ECID_EXT: W8, sparx5: is0 * Used by 802.1BR Bridge Port Extension in an E-Tag - * VCAP_KF_8021Q_DEI0: W1, sparx5: is0 + * VCAP_KF_8021CB_R_TAGGED_IS: W1, lan966x: is1 + * Set if frame contains an RTAG: IEEE 802.1CB (FRER Redundancy tag, Ethertype + * 0xf1c1) + * VCAP_KF_8021Q_DEI0: W1, sparx5: is0, lan966x: is1 * First DEI in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_DEI1: W1, sparx5: is0 + * VCAP_KF_8021Q_DEI1: W1, sparx5: is0, lan966x: is1 * Second DEI in multiple vlan tags (inner tag) * VCAP_KF_8021Q_DEI2: W1, sparx5: is0 * Third DEI in multiple vlan tags (not always available) * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2 * Classified DEI - * VCAP_KF_8021Q_PCP0: W3, sparx5: is0 + * VCAP_KF_8021Q_PCP0: W3, sparx5: is0, lan966x: is1 * First PCP in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_PCP1: W3, sparx5: is0 + * VCAP_KF_8021Q_PCP1: W3, sparx5: is0, lan966x: is1 * Second PCP in multiple vlan tags (inner tag) * VCAP_KF_8021Q_PCP2: W3, sparx5: is0 * Third PCP in multiple vlan tags (not always available) @@ -79,22 +91,24 @@ enum vcap_keyfield_set { * Classified PCP * VCAP_KF_8021Q_TPID: W3, sparx5: es0 * TPID for outer tag: 0: Customer TPID 1: Service TPID (88A8 or programmable) - * VCAP_KF_8021Q_TPID0: W3, sparx5: is0 + * VCAP_KF_8021Q_TPID0: sparx5 is0 W3, lan966x is1 W1 * First TPIC in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_TPID1: W3, sparx5: is0 + * VCAP_KF_8021Q_TPID1: sparx5 is0 W3, lan966x is1 W1 * Second TPID in multiple vlan tags (inner tag) * VCAP_KF_8021Q_TPID2: W3, sparx5: is0 * Third TPID in multiple vlan tags (not always available) - * VCAP_KF_8021Q_VID0: W12, sparx5: is0 + * VCAP_KF_8021Q_VID0: W12, sparx5: is0, lan966x: is1 * First VID in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_VID1: W12, sparx5: is0 + * VCAP_KF_8021Q_VID1: W12, sparx5: is0, lan966x: is1 * Second VID in multiple vlan tags (inner tag) * VCAP_KF_8021Q_VID2: W12, sparx5: is0 * Third VID in multiple vlan tags (not always available) * VCAP_KF_8021Q_VID_CLS: sparx5 is2 W13, sparx5 es0 W13, sparx5 es2 W13, * lan966x is2 W12 * Classified VID - * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2, lan966x: is2 + * VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS: W1, lan966x: is1 + * Set if frame has two or more Q-tags. Independent of port VLAN awareness + * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2, lan966x: is1/is2 * Sparx5: Set if frame was received with a VLAN tag, LAN966x: Set if frame has * one or more Q-tags. Independent of port VLAN awareness * VCAP_KF_8021Q_VLAN_TAGS: W3, sparx5: is0 @@ -120,9 +134,9 @@ enum vcap_keyfield_set { * Class of service * VCAP_KF_ES0_ISDX_KEY_ENA: W1, sparx5: es2 * The value taken from the IFH .FWD.ES0_ISDX_KEY_ENA - * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2, lan966x: is1/is2 * Ethernet type - * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2 + * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2, lan966x: is1 * Set if frame has EtherType >= 0x600 * VCAP_KF_HOST_MATCH: W1, lan966x: is2 * The action from the SMAC_SIP4 or SMAC_SIP6 lookups. Used for IP source @@ -134,11 +148,12 @@ enum vcap_keyfield_set { * CPU queue) * VCAP_KF_IF_EGR_PORT_NO: W7, sparx5: es0 * Egress port number - * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is2 W4 + * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is1 W3, lan966x + * is2 W4 * Sparx5: Logical ingress port number retrieved from * ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65, - * lan966x is2 W9 + * lan966x is1 W9, lan966x is2 W9 * Ingress port mask, one bit per port/erleg * VCAP_KF_IF_IGR_PORT_MASK_L3: W1, sparx5: is2 * If set, IF_IGR_PORT_MASK, IF_IGR_PORT_MASK_RNG, and IF_IGR_PORT_MASK_SEL are @@ -151,24 +166,26 @@ enum vcap_keyfield_set { * Mapping: 0: DEFAULT 1: LOOPBACK 2: MASQUERADE 3: CPU_VD * VCAP_KF_IF_IGR_PORT_SEL: W1, sparx5: es2 * Selector for IF_IGR_PORT: physical port number or ERLEG - * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Set if frame has EtherType = 0x800 and IP version = 4 - * VCAP_KF_IP_MC_IS: W1, sparx5: is0 + * VCAP_KF_IP_MC_IS: W1, sparx5: is0, lan966x: is1 * Set if frame is IPv4 frame and frame's destination MAC address is an IPv4 * multicast address (0x01005E0 /25). Set if frame is IPv6 frame and frame's * destination MAC address is an IPv6 multicast address (0x3333/16). - * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0 + * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0, lan966x: is1 * Payload bytes after IP header - * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0 + * VCAP_KF_IP_PAYLOAD_S1_IP6: W112, lan966x: is1 + * Payload after IPv6 header + * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0, lan966x: is1 * Set if frame is IPv4, IPv6, or SNAP frame * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es0/es2 * Classified ISDX * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2 * Set if classified ISDX > 0 - * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Set if frame's destination MAC address is the broadcast address * (FF-FF-FF-FF-FF-FF). - * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2 * Destination MAC address * VCAP_KF_L2_FRM_TYPE: W4, lan966x: is2 * Frame subtype for specific EtherTypes (MRP, DLR) @@ -176,7 +193,9 @@ enum vcap_keyfield_set { * Set if the frame is allowed to be forwarded to front ports * VCAP_KF_L2_LLC: W40, lan966x: is2 * LLC header and data after up to two VLAN tags and the type/length field - * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L2_MAC: W48, lan966x: is1 + * MAC address (FIRST=1: SMAC, FIRST=0: DMAC) + * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Set if frame's destination MAC address is a multicast address (bit 40 = 1). * VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2 * Payload bytes 0-1 after the frame's EtherType @@ -188,7 +207,7 @@ enum vcap_keyfield_set { * specifically for PTP frames. * VCAP_KF_L2_PAYLOAD_ETYPE: W64, sparx5: is2/es2 * Byte 0-7 of L2 payload after Type/Len field and overloading for OAM - * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2 * Source MAC address * VCAP_KF_L2_SNAP: W40, lan966x: is2 * SNAP header after LLC header (AA-AA-03) @@ -196,32 +215,38 @@ enum vcap_keyfield_set { * Set if Src IP matches Dst IP address * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2 * The frames drop precedence level - * VCAP_KF_L3_DSCP: W6, sparx5: is0 + * VCAP_KF_L3_DSCP: W6, sparx5: is0, lan966x: is1 * Frame's DSCP value * VCAP_KF_L3_DST_IS: W1, sparx5: is2 * Set if lookup is done for egress router leg - * VCAP_KF_L3_FRAGMENT: W1, lan966x: is2 + * VCAP_KF_L3_FRAGMENT: W1, lan966x: is1/is2 * Set if IPv4 frame is fragmented * VCAP_KF_L3_FRAGMENT_TYPE: W2, sparx5: is0/is2/es2 * L3 Fragmentation type (none, initial, suspicious, valid follow up) * VCAP_KF_L3_FRAG_INVLD_L4_LEN: W1, sparx5: is0/is2 * Set if frame's L4 length is less than ANA_CL:COMMON:CLM_FRAGMENT_CFG.L4_MIN_L * EN - * VCAP_KF_L3_FRAG_OFS_GT0: W1, lan966x: is2 + * VCAP_KF_L3_FRAG_OFS_GT0: W1, lan966x: is1/is2 * Set if IPv4 frame is fragmented and it is not the first fragment - * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2, lan966x: is1/is2 * Destination IPv4 Address - * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2, lan966x: is1/is2 * Source IPv4 Address - * VCAP_KF_L3_IP6_DIP: W128, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_IP6_DIP: sparx5 is0 W128, sparx5 is2 W128, sparx5 es2 W128, + * lan966x is1 W64, lan966x is1 W128, lan966x is2 W128 * Sparx5: Full IPv6 DIP, LAN966x: Either Full IPv6 DIP or a subset depending on * frame type - * VCAP_KF_L3_IP6_SIP: W128, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_IP6_DIP_MSB: W16, lan966x: is1 + * MS 16bits of IPv6 DIP + * VCAP_KF_L3_IP6_SIP: sparx5 is0 W128, sparx5 is2 W128, sparx5 es2 W128, + * lan966x is1 W128, lan966x is1 W64, lan966x is2 W128 * Sparx5: Full IPv6 SIP, LAN966x: Either Full IPv6 SIP or a subset depending on * frame type - * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_IP6_SIP_MSB: W16, lan966x: is1 + * MS 16bits of IPv6 DIP + * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2, lan966x: is1/is2 * IPv4 frames: IP protocol. IPv6 frames: Next header, same as for IPV4 - * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Set if IPv4 frame contains options (IP len > 5) * VCAP_KF_L3_PAYLOAD: sparx5 is2 W96, sparx5 is2 W40, sparx5 es2 W96, sparx5 * es2 W40, lan966x is2 W56 @@ -254,7 +279,8 @@ enum vcap_keyfield_set { * VCAP_KF_L4_PSH: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag PSH, LAN966x: TCP: TCP flag PSH. PTP over UDP: flagField bit * 1 (twoStepFlag) - * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16, lan966x is2 W8 + * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16, lan966x is1 + * W8, lan966x is2 W8 * Range checker bitmask (one for each range checker). Input into range checkers * is taken from classified results (VID, DSCP) and frame (SPORT, DPORT, ETYPE, * outer VID, inner VID) @@ -264,7 +290,7 @@ enum vcap_keyfield_set { * VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if TCP sequence number is 0, LAN966x: Overlayed with PTP over UDP: * messageType bit 0 - * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is1/is2 * TCP/UDP source port * VCAP_KF_L4_SPORT_EQ_DPORT_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if UDP or TCP source port equals UDP or TCP destination port @@ -274,13 +300,16 @@ enum vcap_keyfield_set { * VCAP_KF_L4_URG: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag URG, LAN966x: TCP: TCP flag URG. PTP over UDP: flagField bit * 7 (reserved) - * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Selects between entries relevant for first and second lookup. Set for first * lookup, cleared for second lookup. * VCAP_KF_LOOKUP_GEN_IDX: W12, sparx5: is0 * Generic index - for chaining CLM instances * VCAP_KF_LOOKUP_GEN_IDX_SEL: W2, sparx5: is0 * Select the mode of the Generic Index + * VCAP_KF_LOOKUP_INDEX: W2, lan966x: is1 + * 0: First lookup, 1: Second lookup, 2: Third lookup, Similar to VCAP_KF_FIRST + * but with extra info * VCAP_KF_LOOKUP_PAG: W8, sparx5: is2, lan966x: is2 * Classified Policy Association Group: chains rules from IS1/CLM to IS2 * VCAP_KF_MIRROR_PROBE: W2, sparx5: es2 @@ -303,14 +332,22 @@ enum vcap_keyfield_set { * Set if frame's EtherType = 0x8902 * VCAP_KF_PROT_ACTIVE: W1, sparx5: es0/es2 * Protection is active - * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2, lan966x: is2 + * VCAP_KF_RT_FRMID: W32, lan966x: is1 + * Profinet or OPC-UA FrameId + * VCAP_KF_RT_TYPE: W2, lan966x: is1 + * Encoding of frame's EtherType: 0: Other, 1: Profinet, 2: OPC-UA, 3: Custom + * (ANA::RT_CUSTOM) + * VCAP_KF_RT_VLAN_IDX: W3, lan966x: is1 + * Real-time VLAN index from ANA::RT_VLAN_PCP + * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 * Set if frame is IPv4 TCP frame (IP protocol = 6) or IPv6 TCP frames (Next * header = 6) - * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2 + * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2, lan966x: is1 * Set if frame is IPv4/IPv6 TCP or UDP frame (IP protocol/next header equals 6 * or 17) * VCAP_KF_TYPE: sparx5 is0 W2, sparx5 is0 W1, sparx5 is2 W4, sparx5 is2 W2, - * sparx5 es0 W1, sparx5 es2 W3, lan966x is2 W4, lan966x is2 W2 + * sparx5 es0 W1, sparx5 es2 W3, lan966x is1 W1, lan966x is1 W2, lan966x is2 W4, + * lan966x is2 W2 * Keyset type id - set by the API */ @@ -323,6 +360,7 @@ enum vcap_key_field { VCAP_KF_8021BR_GRP, VCAP_KF_8021BR_IGR_ECID_BASE, VCAP_KF_8021BR_IGR_ECID_EXT, + VCAP_KF_8021CB_R_TAGGED_IS, VCAP_KF_8021Q_DEI0, VCAP_KF_8021Q_DEI1, VCAP_KF_8021Q_DEI2, @@ -339,6 +377,7 @@ enum vcap_key_field { VCAP_KF_8021Q_VID1, VCAP_KF_8021Q_VID2, VCAP_KF_8021Q_VID_CLS, + VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS, VCAP_KF_8021Q_VLAN_TAGGED_IS, VCAP_KF_8021Q_VLAN_TAGS, VCAP_KF_ACL_GRP_ID, @@ -366,6 +405,7 @@ enum vcap_key_field { VCAP_KF_IP4_IS, VCAP_KF_IP_MC_IS, VCAP_KF_IP_PAYLOAD_5TUPLE, + VCAP_KF_IP_PAYLOAD_S1_IP6, VCAP_KF_IP_SNAP_IS, VCAP_KF_ISDX_CLS, VCAP_KF_ISDX_GT0_IS, @@ -374,6 +414,7 @@ enum vcap_key_field { VCAP_KF_L2_FRM_TYPE, VCAP_KF_L2_FWD_IS, VCAP_KF_L2_LLC, + VCAP_KF_L2_MAC, VCAP_KF_L2_MC_IS, VCAP_KF_L2_PAYLOAD0, VCAP_KF_L2_PAYLOAD1, @@ -392,7 +433,9 @@ enum vcap_key_field { VCAP_KF_L3_IP4_DIP, VCAP_KF_L3_IP4_SIP, VCAP_KF_L3_IP6_DIP, + VCAP_KF_L3_IP6_DIP_MSB, VCAP_KF_L3_IP6_SIP, + VCAP_KF_L3_IP6_SIP_MSB, VCAP_KF_L3_IP_PROTO, VCAP_KF_L3_OPTIONS_IS, VCAP_KF_L3_PAYLOAD, @@ -416,6 +459,7 @@ enum vcap_key_field { VCAP_KF_LOOKUP_FIRST_IS, VCAP_KF_LOOKUP_GEN_IDX, VCAP_KF_LOOKUP_GEN_IDX_SEL, + VCAP_KF_LOOKUP_INDEX, VCAP_KF_LOOKUP_PAG, VCAP_KF_MIRROR_PROBE, VCAP_KF_OAM_CCM_CNTS_EQ0, @@ -427,6 +471,9 @@ enum vcap_key_field { VCAP_KF_OAM_VER, VCAP_KF_OAM_Y1731_IS, VCAP_KF_PROT_ACTIVE, + VCAP_KF_RT_FRMID, + VCAP_KF_RT_TYPE, + VCAP_KF_RT_VLAN_IDX, VCAP_KF_TCP_IS, VCAP_KF_TCP_UDP_IS, VCAP_KF_TYPE, @@ -440,6 +487,7 @@ enum vcap_actionfield_set { VCAP_AFS_CLASS_REDUCED, /* sparx5 is0 X1 */ VCAP_AFS_ES0, /* sparx5 es0 X1 */ VCAP_AFS_FULL, /* sparx5 is0 X3 */ + VCAP_AFS_S1, /* lan966x is1 X1 */ VCAP_AFS_SMAC_SIP, /* lan966x is2 X1 */ }; @@ -470,23 +518,31 @@ enum vcap_actionfield_set { * CPU extraction queue. Used when FWD_SEL >0 and PIPELINE_ACT = XTR. * VCAP_AF_CPU_QUEUE_NUM: W3, sparx5: is2/es2, lan966x: is2 * CPU queue number. Used when CPU_COPY_ENA is set. + * VCAP_AF_CUSTOM_ACE_TYPE_ENA: W4, lan966x: is1 + * Enables use of custom keys in IS2. Bits 3:2 control second lookup in IS2 + * while bits 1:0 control first lookup. Encoding per lookup: 0: Disabled. 1: + * Extract 40 bytes after position corresponding to the location of the IPv4 + * header and use as key. 2: Extract 40 bytes after SMAC and use as key * VCAP_AF_DEI_A_VAL: W1, sparx5: es0 * DEI used in ES0 tag A. See TAG_A_DEI_SEL. * VCAP_AF_DEI_B_VAL: W1, sparx5: es0 * DEI used in ES0 tag B. See TAG_B_DEI_SEL. * VCAP_AF_DEI_C_VAL: W1, sparx5: es0 * DEI used in ES0 tag C. See TAG_C_DEI_SEL. - * VCAP_AF_DEI_ENA: W1, sparx5: is0 + * VCAP_AF_DEI_ENA: W1, sparx5: is0, lan966x: is1 * If set, use DEI_VAL as classified DEI value. Otherwise, DEI from basic * classification is used - * VCAP_AF_DEI_VAL: W1, sparx5: is0 + * VCAP_AF_DEI_VAL: W1, sparx5: is0, lan966x: is1 * See DEI_ENA - * VCAP_AF_DP_ENA: W1, sparx5: is0 + * VCAP_AF_DLR_SEL: W2, lan966x: is1 + * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.DLR_ENA. 1: Enable + * DLR frame processing 2: Disable DLR processing + * VCAP_AF_DP_ENA: W1, sparx5: is0, lan966x: is1 * If set, use DP_VAL as classified drop precedence level. Otherwise, drop * precedence level from basic classification is used. - * VCAP_AF_DP_VAL: W2, sparx5: is0 + * VCAP_AF_DP_VAL: sparx5 is0 W2, lan966x is1 W1 * See DP_ENA. - * VCAP_AF_DSCP_ENA: W1, sparx5: is0 + * VCAP_AF_DSCP_ENA: W1, sparx5: is0, lan966x: is1 * If set, use DSCP_VAL as classified DSCP value. Otherwise, DSCP value from * basic classification is used. * VCAP_AF_DSCP_SEL: W3, sparx5: es0 @@ -495,7 +551,7 @@ enum vcap_actionfield_set { * table 0, otherwise use DSCP_VAL. 5: Mapped using mapping table 1, otherwise * use mapping table 0. 6: Mapped using mapping table 2, otherwise use DSCP_VAL. * 7: Mapped using mapping table 3, otherwise use mapping table 2 - * VCAP_AF_DSCP_VAL: W6, sparx5: is0/es0 + * VCAP_AF_DSCP_VAL: W6, sparx5: is0/es0, lan966x: is1 * See DSCP_ENA. * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2 * Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP @@ -529,9 +585,16 @@ enum vcap_actionfield_set { * VCAP_AF_ISDX_ADD_REPLACE_SEL: W1, sparx5: is0 * Controls the classified ISDX. 0: New ISDX = old ISDX + ISDX_VAL. 1: New ISDX * = ISDX_VAL. + * VCAP_AF_ISDX_ADD_VAL: W8, lan966x: is1 + * If ISDX_REPLACE_ENA is set, ISDX_ADD_VAL is used directly as the new ISDX. + * Encoding: ISDX_REPLACE_ENA=0, ISDX_ADD_VAL=0: Disabled ISDX_EPLACE_ENA=0, + * ISDX_ADD_VAL>0: Add value to classified ISDX. ISDX_REPLACE_ENA=1: Replace + * with ISDX_ADD_VAL value. * VCAP_AF_ISDX_ENA: W1, lan966x: is2 * Setting this bit to 1 causes the classified ISDX to be set to the value of * POLICE_IDX[8:0]. + * VCAP_AF_ISDX_REPLACE_ENA: W1, lan966x: is1 + * If set, classified ISDX is set to ISDX_ADD_VAL. * VCAP_AF_ISDX_VAL: W12, sparx5: is0 * See isdx_add_replace_sel * VCAP_AF_LOOP_ENA: W1, sparx5: es0 @@ -572,14 +635,22 @@ enum vcap_actionfield_set { * VCAP_AF_MIRROR_PROBE_ID: W2, sparx5: es2 * Signals a mirror probe to be placed in the IFH. Only possible when FWD_MODE * is copy. 0: No mirroring. 1-3: Use mirror probe 0-2. + * VCAP_AF_MRP_SEL: W2, lan966x: is1 + * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.MRP_ENA. 1: Enable + * MRP frame processing 2: Disable MRP processing * VCAP_AF_NXT_IDX: W12, sparx5: is0 * Index used as part of key (field G_IDX) in the next lookup. * VCAP_AF_NXT_IDX_CTRL: W3, sparx5: is0 * Controls the generation of the G_IDX used in the VCAP CLM next lookup - * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0 + * VCAP_AF_OAM_SEL: W3, lan966x: is1 + * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.OAM_CFG 1: Enable + * OAM frame processing for untagged frames 2: Enable OAM frame processing for + * single frames 3: Enable OAM frame processing for double frames 4: Disable OAM + * frame processing + * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0, lan966x: is1 * Bits set in this mask will override PAG_VAL from port profile. New PAG = (PAG * (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK) - * VCAP_AF_PAG_VAL: W8, sparx5: is0 + * VCAP_AF_PAG_VAL: W8, sparx5: is0, lan966x: is1 * See PAG_OVERRIDE_MASK. * VCAP_AF_PCP_A_VAL: W3, sparx5: es0 * PCP used in ES0 tag A. See TAG_A_PCP_SEL. @@ -587,10 +658,10 @@ enum vcap_actionfield_set { * PCP used in ES0 tag B. See TAG_B_PCP_SEL. * VCAP_AF_PCP_C_VAL: W3, sparx5: es0 * PCP used in ES0 tag C. See TAG_C_PCP_SEL. - * VCAP_AF_PCP_ENA: W1, sparx5: is0 + * VCAP_AF_PCP_ENA: W1, sparx5: is0, lan966x: is1 * If set, use PCP_VAL as classified PCP value. Otherwise, PCP from basic * classification is used. - * VCAP_AF_PCP_VAL: W3, sparx5: is0 + * VCAP_AF_PCP_VAL: W3, sparx5: is0, lan966x: is1 * See PCP_ENA. * VCAP_AF_PIPELINE_ACT: W1, sparx5: es0 * Pipeline action when FWD_SEL > 0. 0: XTR. CPU_QU selects CPU extraction queue @@ -600,11 +671,11 @@ enum vcap_actionfield_set { * PIPELINE_PT == NONE. Overrules previous settings of pipeline point. * VCAP_AF_PIPELINE_PT: sparx5 is2 W5, sparx5 es0 W2 * Pipeline point used if PIPELINE_FORCE_ENA is set - * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2, lan966x: is2 - * Setting this bit to 1 causes frames that hit this action to be policed by the - * ACL policer specified in POLICE_IDX. Only applies to the first lookup. - * VCAP_AF_POLICE_IDX: sparx5 is2 W6, sparx5 es2 W6, lan966x is2 W9 - * Selects VCAP policer used when policing frames (POLICE_ENA) + * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2, lan966x: is1/is2 + * If set, POLICE_IDX is used to lookup ANA::POL. + * VCAP_AF_POLICE_IDX: sparx5 is2 W6, sparx5 es2 W6, lan966x is1 W9, lan966x is2 + * W9 + * Policer index. * VCAP_AF_POLICE_REMARK: W1, sparx5: es2 * If set, frames exceeding policer rates are marked as yellow but not * discarded. @@ -628,16 +699,24 @@ enum vcap_actionfield_set { * port. 1: ES0 tag A: Push ES0 tag A. No port tag. 2: Force port tag: Always * push port tag. No ES0 tag A. 3: Force untag: Never push port tag or ES0 tag * A. - * VCAP_AF_QOS_ENA: W1, sparx5: is0 + * VCAP_AF_QOS_ENA: W1, sparx5: is0, lan966x: is1 * If set, use QOS_VAL as classified QoS class. Otherwise, QoS class from basic * classification is used. - * VCAP_AF_QOS_VAL: W3, sparx5: is0 + * VCAP_AF_QOS_VAL: W3, sparx5: is0, lan966x: is1 * See QOS_ENA. * VCAP_AF_REW_OP: W16, lan966x: is2 * Rewriter operation command. * VCAP_AF_RT_DIS: W1, sparx5: is2 * If set, routing is disallowed. Only applies when IS_INNER_ACL is 0. See also * IGR_ACL_ENA, EGR_ACL_ENA, and RLEG_STAT_IDX. + * VCAP_AF_SFID_ENA: W1, lan966x: is1 + * If set, SFID_VAL is used to lookup ANA::SFID. + * VCAP_AF_SFID_VAL: W8, lan966x: is1 + * Stream filter identifier. + * VCAP_AF_SGID_ENA: W1, lan966x: is1 + * If set, SGID_VAL is used to lookup ANA::SGID. + * VCAP_AF_SGID_VAL: W8, lan966x: is1 + * Stream gate identifier. * VCAP_AF_SWAP_MACS_ENA: W1, sparx5: es0 * This setting is only active when FWD_SEL = 1 or FWD_SEL = 2 and PIPELINE_ACT * = LBK_ASM. 0: No action. 1: Swap MACs and clear bit 40 in new SMAC. @@ -686,7 +765,7 @@ enum vcap_actionfield_set { * VCAP_AF_TAG_C_VID_SEL: W2, sparx5: es0 * Selects VID for ES0 tag C. The resulting VID is termed C-TAG.VID. 0: * Classified VID. 1: VID_C_VAL. 2: IFH.ENCAP.GVID. 3: Reserved. - * VCAP_AF_TYPE: W1, sparx5: is0 + * VCAP_AF_TYPE: W1, sparx5: is0, lan966x: is1 * Actionset type id - Set by the API * VCAP_AF_UNTAG_VID_ENA: W1, sparx5: es0 * Controls insertion of tag C. Untag or insert mode can be selected. See @@ -697,8 +776,19 @@ enum vcap_actionfield_set { * VID used in ES0 tag B. See TAG_B_VID_SEL. * VCAP_AF_VID_C_VAL: W12, sparx5: es0 * VID used in ES0 tag C. See TAG_C_VID_SEL. - * VCAP_AF_VID_VAL: W13, sparx5: is0 + * VCAP_AF_VID_REPLACE_ENA: W1, lan966x: is1 + * Controls the classified VID: VID_REPLACE_ENA=0: Add VID_ADD_VAL to basic + * classified VID and use result as new classified VID. VID_REPLACE_ENA = 1: + * Replace basic classified VID with VID_VAL value and use as new classified + * VID. + * VCAP_AF_VID_VAL: sparx5 is0 W13, lan966x is1 W12 * New VID Value + * VCAP_AF_VLAN_POP_CNT: W2, lan966x: is1 + * See VLAN_POP_CNT_ENA + * VCAP_AF_VLAN_POP_CNT_ENA: W1, lan966x: is1 + * If set, use VLAN_POP_CNT as the number of VLAN tags to pop from the incoming + * frame. This number is used by the Rewriter. Otherwise, VLAN_POP_CNT from + * ANA:PORT:VLAN_CFG.VLAN_POP_CNT is used */ /* Actionfield names */ @@ -712,11 +802,13 @@ enum vcap_action_field { VCAP_AF_CPU_COPY_ENA, VCAP_AF_CPU_QU, VCAP_AF_CPU_QUEUE_NUM, + VCAP_AF_CUSTOM_ACE_TYPE_ENA, VCAP_AF_DEI_A_VAL, VCAP_AF_DEI_B_VAL, VCAP_AF_DEI_C_VAL, VCAP_AF_DEI_ENA, VCAP_AF_DEI_VAL, + VCAP_AF_DLR_SEL, VCAP_AF_DP_ENA, VCAP_AF_DP_VAL, VCAP_AF_DSCP_ENA, @@ -732,7 +824,9 @@ enum vcap_action_field { VCAP_AF_IGNORE_PIPELINE_CTRL, VCAP_AF_INTR_ENA, VCAP_AF_ISDX_ADD_REPLACE_SEL, + VCAP_AF_ISDX_ADD_VAL, VCAP_AF_ISDX_ENA, + VCAP_AF_ISDX_REPLACE_ENA, VCAP_AF_ISDX_VAL, VCAP_AF_LOOP_ENA, VCAP_AF_LRN_DIS, @@ -745,8 +839,10 @@ enum vcap_action_field { VCAP_AF_MIRROR_ENA, VCAP_AF_MIRROR_PROBE, VCAP_AF_MIRROR_PROBE_ID, + VCAP_AF_MRP_SEL, VCAP_AF_NXT_IDX, VCAP_AF_NXT_IDX_CTRL, + VCAP_AF_OAM_SEL, VCAP_AF_PAG_OVERRIDE_MASK, VCAP_AF_PAG_VAL, VCAP_AF_PCP_A_VAL, @@ -770,6 +866,10 @@ enum vcap_action_field { VCAP_AF_QOS_VAL, VCAP_AF_REW_OP, VCAP_AF_RT_DIS, + VCAP_AF_SFID_ENA, + VCAP_AF_SFID_VAL, + VCAP_AF_SGID_ENA, + VCAP_AF_SGID_VAL, VCAP_AF_SWAP_MACS_ENA, VCAP_AF_TAG_A_DEI_SEL, VCAP_AF_TAG_A_PCP_SEL, @@ -788,7 +888,10 @@ enum vcap_action_field { VCAP_AF_VID_A_VAL, VCAP_AF_VID_B_VAL, VCAP_AF_VID_C_VAL, + VCAP_AF_VID_REPLACE_ENA, VCAP_AF_VID_VAL, + VCAP_AF_VLAN_POP_CNT, + VCAP_AF_VLAN_POP_CNT_ENA, }; #endif /* __VCAP_AG_API__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c index 0de3f677135a8..b23c11b0647c9 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c @@ -387,7 +387,7 @@ static const char * const test_admin_info_expect[] = { "default_cnt: 73\n", "require_cnt_dis: 0\n", "version: 1\n", - "vtype: 3\n", + "vtype: 4\n", "vinst: 0\n", "ingress: 1\n", "first_cid: 10000\n", @@ -435,7 +435,7 @@ static const char * const test_admin_expect[] = { "default_cnt: 73\n", "require_cnt_dis: 0\n", "version: 1\n", - "vtype: 3\n", + "vtype: 4\n", "vinst: 0\n", "ingress: 1\n", "first_cid: 8000000\n", -- GitLab From a4d9b3ec63de3c36890fb440980ebf8f642bab88 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 23:09:26 +0100 Subject: [PATCH 0344/2078] net: lan966x: Add IS1 VCAP keyset configuration for lan966x Add IS1 VCAP port keyset configuration for lan966x and also update debug fs support to show the keyset configuration. Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/lan966x/lan966x_main.h | 38 ++++ .../ethernet/microchip/lan966x/lan966x_regs.h | 36 ++++ .../microchip/lan966x/lan966x_vcap_debugfs.c | 133 +++++++++++- .../microchip/lan966x/lan966x_vcap_impl.c | 192 ++++++++++++++++-- 4 files changed, 383 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 49f5159afbf30..cbdae0ab8bb6d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -92,6 +92,11 @@ #define SE_IDX_QUEUE 0 /* 0-79 : Queue scheduler elements */ #define SE_IDX_PORT 80 /* 80-89 : Port schedular elements */ +#define LAN966X_VCAP_CID_IS1_L0 VCAP_CID_INGRESS_L0 /* IS1 lookup 0 */ +#define LAN966X_VCAP_CID_IS1_L1 VCAP_CID_INGRESS_L1 /* IS1 lookup 1 */ +#define LAN966X_VCAP_CID_IS1_L2 VCAP_CID_INGRESS_L2 /* IS1 lookup 2 */ +#define LAN966X_VCAP_CID_IS1_MAX (VCAP_CID_INGRESS_L3 - 1) /* IS1 Max */ + #define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */ #define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ #define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ @@ -139,6 +144,39 @@ enum vcap_is2_port_sel_ipv6 { VCAP_IS2_PS_IPV6_MAC_ETYPE, }; +enum vcap_is1_port_sel_other { + VCAP_IS1_PS_OTHER_NORMAL, + VCAP_IS1_PS_OTHER_7TUPLE, + VCAP_IS1_PS_OTHER_DBL_VID, + VCAP_IS1_PS_OTHER_DMAC_VID, +}; + +enum vcap_is1_port_sel_ipv4 { + VCAP_IS1_PS_IPV4_NORMAL, + VCAP_IS1_PS_IPV4_7TUPLE, + VCAP_IS1_PS_IPV4_5TUPLE_IP4, + VCAP_IS1_PS_IPV4_DBL_VID, + VCAP_IS1_PS_IPV4_DMAC_VID, +}; + +enum vcap_is1_port_sel_ipv6 { + VCAP_IS1_PS_IPV6_NORMAL, + VCAP_IS1_PS_IPV6_7TUPLE, + VCAP_IS1_PS_IPV6_5TUPLE_IP4, + VCAP_IS1_PS_IPV6_NORMAL_IP6, + VCAP_IS1_PS_IPV6_5TUPLE_IP6, + VCAP_IS1_PS_IPV6_DBL_VID, + VCAP_IS1_PS_IPV6_DMAC_VID, +}; + +enum vcap_is1_port_sel_rt { + VCAP_IS1_PS_RT_NORMAL, + VCAP_IS1_PS_RT_7TUPLE, + VCAP_IS1_PS_RT_DBL_VID, + VCAP_IS1_PS_RT_DMAC_VID, + VCAP_IS1_PS_RT_FOLLOW_OTHER = 7, +}; + struct lan966x_port; struct lan966x_db { diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 9767b5a1c9580..f99f88b5caa88 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -316,6 +316,42 @@ enum lan966x_target { #define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\ FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) +/* ANA:PORT:VCAP_CFG */ +#define ANA_VCAP_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 12, 0, 1, 4) + +#define ANA_VCAP_CFG_S1_ENA BIT(14) +#define ANA_VCAP_CFG_S1_ENA_SET(x)\ + FIELD_PREP(ANA_VCAP_CFG_S1_ENA, x) +#define ANA_VCAP_CFG_S1_ENA_GET(x)\ + FIELD_GET(ANA_VCAP_CFG_S1_ENA, x) + +/* ANA:PORT:VCAP_S1_KEY_CFG */ +#define ANA_VCAP_S1_CFG(g, r) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 16, r, 3, 4) + +#define ANA_VCAP_S1_CFG_KEY_RT_CFG GENMASK(11, 9) +#define ANA_VCAP_S1_CFG_KEY_RT_CFG_SET(x)\ + FIELD_PREP(ANA_VCAP_S1_CFG_KEY_RT_CFG, x) +#define ANA_VCAP_S1_CFG_KEY_RT_CFG_GET(x)\ + FIELD_GET(ANA_VCAP_S1_CFG_KEY_RT_CFG, x) + +#define ANA_VCAP_S1_CFG_KEY_IP6_CFG GENMASK(8, 6) +#define ANA_VCAP_S1_CFG_KEY_IP6_CFG_SET(x)\ + FIELD_PREP(ANA_VCAP_S1_CFG_KEY_IP6_CFG, x) +#define ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(x)\ + FIELD_GET(ANA_VCAP_S1_CFG_KEY_IP6_CFG, x) + +#define ANA_VCAP_S1_CFG_KEY_IP4_CFG GENMASK(5, 3) +#define ANA_VCAP_S1_CFG_KEY_IP4_CFG_SET(x)\ + FIELD_PREP(ANA_VCAP_S1_CFG_KEY_IP4_CFG, x) +#define ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(x)\ + FIELD_GET(ANA_VCAP_S1_CFG_KEY_IP4_CFG, x) + +#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG GENMASK(2, 0) +#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG_SET(x)\ + FIELD_PREP(ANA_VCAP_S1_CFG_KEY_OTHER_CFG, x) +#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(x)\ + FIELD_GET(ANA_VCAP_S1_CFG_KEY_OTHER_CFG, x) + /* ANA:PORT:VCAP_S2_CFG */ #define ANA_VCAP_S2_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 28, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c index 7a0db58f55136..d90c08cfcf142 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c @@ -5,9 +5,124 @@ #include "vcap_api.h" #include "vcap_api_client.h" -static void lan966x_vcap_port_keys(struct lan966x_port *port, - struct vcap_admin *admin, - struct vcap_output_print *out) +static void lan966x_vcap_is1_port_keys(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + struct lan966x *lan966x = port->lan966x; + u32 val; + + out->prf(out->dst, " port[%d] (%s): ", port->chip_port, + netdev_name(port->dev)); + + val = lan_rd(lan966x, ANA_VCAP_CFG(port->chip_port)); + out->prf(out->dst, "\n state: "); + if (ANA_VCAP_CFG_S1_ENA_GET(val)) + out->prf(out->dst, "on"); + else + out->prf(out->dst, "off"); + + for (int l = 0; l < admin->lookups; ++l) { + out->prf(out->dst, "\n Lookup %d: ", l); + + out->prf(out->dst, "\n other: "); + switch (ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(val)) { + case VCAP_IS1_PS_OTHER_NORMAL: + out->prf(out->dst, "normal"); + break; + case VCAP_IS1_PS_OTHER_7TUPLE: + out->prf(out->dst, "7tuple"); + break; + case VCAP_IS1_PS_OTHER_DBL_VID: + out->prf(out->dst, "dbl_vid"); + break; + case VCAP_IS1_PS_OTHER_DMAC_VID: + out->prf(out->dst, "dmac_vid"); + break; + default: + out->prf(out->dst, "-"); + break; + } + + out->prf(out->dst, "\n ipv4: "); + switch (ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(val)) { + case VCAP_IS1_PS_IPV4_NORMAL: + out->prf(out->dst, "normal"); + break; + case VCAP_IS1_PS_IPV4_7TUPLE: + out->prf(out->dst, "7tuple"); + break; + case VCAP_IS1_PS_IPV4_5TUPLE_IP4: + out->prf(out->dst, "5tuple_ipv4"); + break; + case VCAP_IS1_PS_IPV4_DBL_VID: + out->prf(out->dst, "dbl_vid"); + break; + case VCAP_IS1_PS_IPV4_DMAC_VID: + out->prf(out->dst, "dmac_vid"); + break; + default: + out->prf(out->dst, "-"); + break; + } + + out->prf(out->dst, "\n ipv6: "); + switch (ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(val)) { + case VCAP_IS1_PS_IPV6_NORMAL: + out->prf(out->dst, "normal"); + break; + case VCAP_IS1_PS_IPV6_7TUPLE: + out->prf(out->dst, "7tuple"); + break; + case VCAP_IS1_PS_IPV6_5TUPLE_IP4: + out->prf(out->dst, "5tuple_ip4"); + break; + case VCAP_IS1_PS_IPV6_NORMAL_IP6: + out->prf(out->dst, "normal_ip6"); + break; + case VCAP_IS1_PS_IPV6_5TUPLE_IP6: + out->prf(out->dst, "5tuple_ip6"); + break; + case VCAP_IS1_PS_IPV6_DBL_VID: + out->prf(out->dst, "dbl_vid"); + break; + case VCAP_IS1_PS_IPV6_DMAC_VID: + out->prf(out->dst, "dmac_vid"); + break; + default: + out->prf(out->dst, "-"); + break; + } + + out->prf(out->dst, "\n rt: "); + switch (ANA_VCAP_S1_CFG_KEY_RT_CFG_GET(val)) { + case VCAP_IS1_PS_RT_NORMAL: + out->prf(out->dst, "normal"); + break; + case VCAP_IS1_PS_RT_7TUPLE: + out->prf(out->dst, "7tuple"); + break; + case VCAP_IS1_PS_RT_DBL_VID: + out->prf(out->dst, "dbl_vid"); + break; + case VCAP_IS1_PS_RT_DMAC_VID: + out->prf(out->dst, "dmac_vid"); + break; + case VCAP_IS1_PS_RT_FOLLOW_OTHER: + out->prf(out->dst, "follow_other"); + break; + default: + out->prf(out->dst, "-"); + break; + } + } + + out->prf(out->dst, "\n"); +} + +static void lan966x_vcap_is2_port_keys(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_output_print *out) { struct lan966x *lan966x = port->lan966x; u32 val; @@ -88,7 +203,17 @@ int lan966x_vcap_port_info(struct net_device *dev, vcap = &vctrl->vcaps[admin->vtype]; out->prf(out->dst, "%s:\n", vcap->name); - lan966x_vcap_port_keys(port, admin, out); + switch (admin->vtype) { + case VCAP_TYPE_IS2: + lan966x_vcap_is2_port_keys(port, admin, out); + break; + case VCAP_TYPE_IS1: + lan966x_vcap_is1_port_keys(port, admin, out); + break; + default: + out->prf(out->dst, " no info\n"); + break; + } return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index 68f9d69fd37b6..7ea8e86336091 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -8,6 +8,7 @@ #define STREAMSIZE (64 * 4) +#define LAN966X_IS1_LOOKUPS 3 #define LAN966X_IS2_LOOKUPS 2 static struct lan966x_vcap_inst { @@ -19,6 +20,15 @@ static struct lan966x_vcap_inst { int count; /* number of available addresses */ bool ingress; /* is vcap in the ingress path */ } lan966x_vcap_inst_cfg[] = { + { + .vtype = VCAP_TYPE_IS1, /* IS1-0 */ + .tgt_inst = 1, + .lookups = LAN966X_IS1_LOOKUPS, + .first_cid = LAN966X_VCAP_CID_IS1_L0, + .last_cid = LAN966X_VCAP_CID_IS1_MAX, + .count = 768, + .ingress = true, + }, { .vtype = VCAP_TYPE_IS2, /* IS2-0 */ .tgt_inst = 2, @@ -72,7 +82,21 @@ static void __lan966x_vcap_range_init(struct lan966x *lan966x, lan966x_vcap_wait_update(lan966x, admin->tgt_inst); } -static int lan966x_vcap_cid_to_lookup(int cid) +static int lan966x_vcap_is1_cid_to_lookup(int cid) +{ + int lookup = 0; + + if (cid >= LAN966X_VCAP_CID_IS1_L1 && + cid < LAN966X_VCAP_CID_IS1_L2) + lookup = 1; + else if (cid >= LAN966X_VCAP_CID_IS1_L2 && + cid < LAN966X_VCAP_CID_IS1_MAX) + lookup = 2; + + return lookup; +} + +static int lan966x_vcap_is2_cid_to_lookup(int cid) { if (cid >= LAN966X_VCAP_CID_IS2_L1 && cid < LAN966X_VCAP_CID_IS2_MAX) @@ -81,6 +105,67 @@ static int lan966x_vcap_cid_to_lookup(int cid) return 0; } +/* Return the list of keysets for the vcap port configuration */ +static int +lan966x_vcap_is1_get_port_keysets(struct net_device *ndev, int lookup, + struct vcap_keyset_list *keysetlist, + u16 l3_proto) +{ + struct lan966x_port *port = netdev_priv(ndev); + struct lan966x *lan966x = port->lan966x; + u32 val; + + val = lan_rd(lan966x, ANA_VCAP_S1_CFG(port->chip_port, lookup)); + + /* Collect all keysets for the port in a list */ + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IP) { + switch (ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(val)) { + case VCAP_IS1_PS_IPV4_7TUPLE: + vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE); + break; + case VCAP_IS1_PS_IPV4_5TUPLE_IP4: + vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP4); + break; + case VCAP_IS1_PS_IPV4_NORMAL: + vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL); + break; + } + } + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IPV6) { + switch (ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(val)) { + case VCAP_IS1_PS_IPV6_NORMAL: + case VCAP_IS1_PS_IPV6_NORMAL_IP6: + vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL); + vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL_IP6); + break; + case VCAP_IS1_PS_IPV6_5TUPLE_IP6: + vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP6); + break; + case VCAP_IS1_PS_IPV6_7TUPLE: + vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE); + break; + case VCAP_IS1_PS_IPV6_5TUPLE_IP4: + vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP4); + break; + case VCAP_IS1_PS_IPV6_DMAC_VID: + vcap_keyset_list_add(keysetlist, VCAP_KFS_DMAC_VID); + break; + } + } + + switch (ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(val)) { + case VCAP_IS1_PS_OTHER_7TUPLE: + vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE); + break; + case VCAP_IS1_PS_OTHER_NORMAL: + vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL); + break; + } + + return 0; +} + static int lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup, struct vcap_keyset_list *keysetlist, @@ -180,11 +265,26 @@ lan966x_vcap_validate_keyset(struct net_device *dev, if (!kslist || kslist->cnt == 0) return VCAP_KFS_NO_VALUE; - lookup = lan966x_vcap_cid_to_lookup(rule->vcap_chain_id); keysetlist.max = ARRAY_SIZE(keysets); keysetlist.keysets = keysets; - err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist, - l3_proto); + + switch (admin->vtype) { + case VCAP_TYPE_IS1: + lookup = lan966x_vcap_is1_cid_to_lookup(rule->vcap_chain_id); + err = lan966x_vcap_is1_get_port_keysets(dev, lookup, &keysetlist, + l3_proto); + break; + case VCAP_TYPE_IS2: + lookup = lan966x_vcap_is2_cid_to_lookup(rule->vcap_chain_id); + err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist, + l3_proto); + break; + default: + pr_err("vcap type: %s not supported\n", + lan966x_vcaps[admin->vtype].name); + return VCAP_KFS_NO_VALUE; + } + if (err) return VCAP_KFS_NO_VALUE; @@ -197,17 +297,32 @@ lan966x_vcap_validate_keyset(struct net_device *dev, return VCAP_KFS_NO_VALUE; } -static bool lan966x_vcap_is_first_chain(struct vcap_rule *rule) +static bool lan966x_vcap_is2_is_first_chain(struct vcap_rule *rule) { return (rule->vcap_chain_id >= LAN966X_VCAP_CID_IS2_L0 && rule->vcap_chain_id < LAN966X_VCAP_CID_IS2_L1); } -static void lan966x_vcap_add_default_fields(struct net_device *dev, - struct vcap_admin *admin, - struct vcap_rule *rule) +static void lan966x_vcap_is1_add_default_fields(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + u32 value, mask; + u32 lookup; + + if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, + &value, &mask)) + vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0, + ~BIT(port->chip_port)); + + lookup = lan966x_vcap_is1_cid_to_lookup(rule->vcap_chain_id); + vcap_rule_add_key_u32(rule, VCAP_KF_LOOKUP_INDEX, lookup, 0x3); +} + +static void lan966x_vcap_is2_add_default_fields(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_rule *rule) { - struct lan966x_port *port = netdev_priv(dev); u32 value, mask; if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, @@ -215,7 +330,7 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev, vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0, ~BIT(port->chip_port)); - if (lan966x_vcap_is_first_chain(rule)) + if (lan966x_vcap_is2_is_first_chain(rule)) vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, VCAP_BIT_1); else @@ -223,6 +338,26 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev, VCAP_BIT_0); } +static void lan966x_vcap_add_default_fields(struct net_device *dev, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + struct lan966x_port *port = netdev_priv(dev); + + switch (admin->vtype) { + case VCAP_TYPE_IS1: + lan966x_vcap_is1_add_default_fields(port, admin, rule); + break; + case VCAP_TYPE_IS2: + lan966x_vcap_is2_add_default_fields(port, admin, rule); + break; + default: + pr_err("vcap type: %s not supported\n", + lan966x_vcaps[admin->vtype].name); + break; + } +} + static void lan966x_vcap_cache_erase(struct vcap_admin *admin) { memset(admin->cache.keystream, 0, STREAMSIZE); @@ -464,8 +599,37 @@ static void lan966x_vcap_block_init(struct lan966x *lan966x, static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x, struct vcap_admin *admin) { - for (int p = 0; p < lan966x->num_phys_ports; ++p) - lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p)); + u32 val; + + switch (admin->vtype) { + case VCAP_TYPE_IS1: + val = ANA_VCAP_S1_CFG_KEY_IP6_CFG_SET(VCAP_IS1_PS_IPV6_5TUPLE_IP6) | + ANA_VCAP_S1_CFG_KEY_IP4_CFG_SET(VCAP_IS1_PS_IPV4_5TUPLE_IP4) | + ANA_VCAP_S1_CFG_KEY_OTHER_CFG_SET(VCAP_IS1_PS_OTHER_NORMAL); + + for (int p = 0; p < lan966x->num_phys_ports; ++p) { + if (!lan966x->ports[p]) + continue; + + for (int l = 0; l < LAN966X_IS1_LOOKUPS; ++l) + lan_wr(val, lan966x, ANA_VCAP_S1_CFG(p, l)); + + lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true), + ANA_VCAP_CFG_S1_ENA, lan966x, + ANA_VCAP_CFG(p)); + } + + break; + case VCAP_TYPE_IS2: + for (int p = 0; p < lan966x->num_phys_ports; ++p) + lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p)); + + break; + default: + pr_err("vcap type: %s not supported\n", + lan966x_vcaps[admin->vtype].name); + break; + } } int lan966x_vcap_init(struct lan966x *lan966x) @@ -506,6 +670,10 @@ int lan966x_vcap_init(struct lan966x *lan966x) lan_rmw(ANA_VCAP_S2_CFG_ENA_SET(true), ANA_VCAP_S2_CFG_ENA, lan966x, ANA_VCAP_S2_CFG(lan966x->ports[p]->chip_port)); + + lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true), + ANA_VCAP_CFG_S1_ENA, lan966x, + ANA_VCAP_CFG(lan966x->ports[p]->chip_port)); } } -- GitLab From 135c2116fd03642b74fa2c0cd1cbd2614ca3d80c Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 23:09:27 +0100 Subject: [PATCH 0345/2078] net: lan966x: Add TC support for IS1 VCAP Enable TC command to use IS1 VCAP Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_tc_flower.c | 105 +++++++++++++++++- 1 file changed, 102 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index f960727ecaeec..8391652c1c45e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -79,18 +79,61 @@ lan966x_tc_flower_handler_basic_usage(struct vcap_tc_flower_parse_usage *st) VCAP_BIT_1); if (err) goto out; + } else if (st->l3_proto == ETH_P_IPV6 && + st->admin->vtype == VCAP_TYPE_IS1) { + /* Don't set any keys in this case */ + } else if (st->l3_proto == ETH_P_SNAP && + st->admin->vtype == VCAP_TYPE_IS1) { + err = vcap_rule_add_key_bit(st->vrule, + VCAP_KF_ETYPE_LEN_IS, + VCAP_BIT_0); + if (err) + goto out; + + err = vcap_rule_add_key_bit(st->vrule, + VCAP_KF_IP_SNAP_IS, + VCAP_BIT_1); + if (err) + goto out; + } else if (st->admin->vtype == VCAP_TYPE_IS1) { + err = vcap_rule_add_key_bit(st->vrule, + VCAP_KF_ETYPE_LEN_IS, + VCAP_BIT_1); + if (err) + goto out; + + err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_ETYPE, + st->l3_proto, ~0); + if (err) + goto out; } } if (match.mask->ip_proto) { st->l4_proto = match.key->ip_proto; if (st->l4_proto == IPPROTO_TCP) { + if (st->admin->vtype == VCAP_TYPE_IS1) { + err = vcap_rule_add_key_bit(st->vrule, + VCAP_KF_TCP_UDP_IS, + VCAP_BIT_1); + if (err) + goto out; + } + err = vcap_rule_add_key_bit(st->vrule, VCAP_KF_TCP_IS, VCAP_BIT_1); if (err) goto out; } else if (st->l4_proto == IPPROTO_UDP) { + if (st->admin->vtype == VCAP_TYPE_IS1) { + err = vcap_rule_add_key_bit(st->vrule, + VCAP_KF_TCP_UDP_IS, + VCAP_BIT_1); + if (err) + goto out; + } + err = vcap_rule_add_key_bit(st->vrule, VCAP_KF_TCP_IS, VCAP_BIT_0); @@ -112,12 +155,30 @@ out: return err; } +static int +lan966x_tc_flower_handler_cvlan_usage(struct vcap_tc_flower_parse_usage *st) +{ + if (st->admin->vtype != VCAP_TYPE_IS1) { + NL_SET_ERR_MSG_MOD(st->fco->common.extack, + "cvlan not supported in this VCAP"); + return -EINVAL; + } + + return vcap_tc_flower_handler_cvlan_usage(st); +} + static int lan966x_tc_flower_handler_vlan_usage(struct vcap_tc_flower_parse_usage *st) { - return vcap_tc_flower_handler_vlan_usage(st, - VCAP_KF_8021Q_VID_CLS, - VCAP_KF_8021Q_PCP_CLS); + enum vcap_key_field vid_key = VCAP_KF_8021Q_VID_CLS; + enum vcap_key_field pcp_key = VCAP_KF_8021Q_PCP_CLS; + + if (st->admin->vtype == VCAP_TYPE_IS1) { + vid_key = VCAP_KF_8021Q_VID0; + pcp_key = VCAP_KF_8021Q_PCP0; + } + + return vcap_tc_flower_handler_vlan_usage(st, vid_key, pcp_key); } static int @@ -128,6 +189,7 @@ static int [FLOW_DISSECTOR_KEY_CONTROL] = lan966x_tc_flower_handler_control_usage, [FLOW_DISSECTOR_KEY_PORTS] = vcap_tc_flower_handler_portnum_usage, [FLOW_DISSECTOR_KEY_BASIC] = lan966x_tc_flower_handler_basic_usage, + [FLOW_DISSECTOR_KEY_CVLAN] = lan966x_tc_flower_handler_cvlan_usage, [FLOW_DISSECTOR_KEY_VLAN] = lan966x_tc_flower_handler_vlan_usage, [FLOW_DISSECTOR_KEY_TCP] = vcap_tc_flower_handler_tcp_usage, [FLOW_DISSECTOR_KEY_ARP] = vcap_tc_flower_handler_arp_usage, @@ -143,6 +205,7 @@ static int lan966x_tc_flower_use_dissectors(struct flow_cls_offload *f, .fco = f, .vrule = vrule, .l3_proto = ETH_P_ALL, + .admin = admin, }; int err = 0; @@ -221,6 +284,31 @@ static int lan966x_tc_flower_action_check(struct vcap_control *vctrl, return 0; } +/* Add the actionset that is the default for the VCAP type */ +static int lan966x_tc_set_actionset(struct vcap_admin *admin, + struct vcap_rule *vrule) +{ + enum vcap_actionfield_set aset; + int err = 0; + + switch (admin->vtype) { + case VCAP_TYPE_IS1: + aset = VCAP_AFS_S1; + break; + case VCAP_TYPE_IS2: + aset = VCAP_AFS_BASE_TYPE; + break; + default: + return -EINVAL; + } + + /* Do not overwrite any current actionset */ + if (vrule->actionset == VCAP_AFS_NO_VALUE) + err = vcap_set_rule_set_actionset(vrule, aset); + + return err; +} + static int lan966x_tc_flower_add(struct lan966x_port *port, struct flow_cls_offload *f, struct vcap_admin *admin, @@ -253,6 +341,13 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, flow_action_for_each(idx, act, &frule->action) { switch (act->id) { case FLOW_ACTION_TRAP: + if (admin->vtype != VCAP_TYPE_IS2) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Trap action not supported in this VCAP"); + err = -EOPNOTSUPP; + goto out; + } + err = vcap_rule_add_action_bit(vrule, VCAP_AF_CPU_COPY_ENA, VCAP_BIT_1); @@ -266,6 +361,10 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, break; case FLOW_ACTION_GOTO: + err = lan966x_tc_set_actionset(admin, vrule); + if (err) + goto out; + break; default: NL_SET_ERR_MSG_MOD(f->common.extack, -- GitLab From b3762a9db39c40a2e508a8edb91217676a74f299 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 23:09:28 +0100 Subject: [PATCH 0346/2078] net: lan966x: Add TC filter chaining support for IS1 and IS2 VCAPs Allow rules to be chained between IS1 VCAP and IS2 VCAP. Chaining between IS1 lookups or between IS2 lookups are not supported by the hardware. Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_tc_flower.c | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 8391652c1c45e..570ac28736e03 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -309,6 +309,75 @@ static int lan966x_tc_set_actionset(struct vcap_admin *admin, return err; } +static int lan966x_tc_add_rule_link_target(struct vcap_admin *admin, + struct vcap_rule *vrule, + int target_cid) +{ + int link_val = target_cid % VCAP_CID_LOOKUP_SIZE; + int err; + + if (!link_val) + return 0; + + switch (admin->vtype) { + case VCAP_TYPE_IS1: + /* Choose IS1 specific NXT_IDX key (for chaining rules from IS1) */ + err = vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_GEN_IDX_SEL, + 1, ~0); + if (err) + return err; + + return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_GEN_IDX, + link_val, ~0); + case VCAP_TYPE_IS2: + /* Add IS2 specific PAG key (for chaining rules from IS1) */ + return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_PAG, + link_val, ~0); + default: + break; + } + return 0; +} + +static int lan966x_tc_add_rule_link(struct vcap_control *vctrl, + struct vcap_admin *admin, + struct vcap_rule *vrule, + struct flow_cls_offload *f, + int to_cid) +{ + struct vcap_admin *to_admin = vcap_find_admin(vctrl, to_cid); + int diff, err = 0; + + if (!to_admin) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Unknown destination chain"); + return -EINVAL; + } + + diff = vcap_chain_offset(vctrl, f->common.chain_index, to_cid); + if (!diff) + return 0; + + /* Between IS1 and IS2 the PAG value is used */ + if (admin->vtype == VCAP_TYPE_IS1 && to_admin->vtype == VCAP_TYPE_IS2) { + /* This works for IS1->IS2 */ + err = vcap_rule_add_action_u32(vrule, VCAP_AF_PAG_VAL, diff); + if (err) + return err; + + err = vcap_rule_add_action_u32(vrule, VCAP_AF_PAG_OVERRIDE_MASK, + 0xff); + if (err) + return err; + } else { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Unsupported chain destination"); + return -EOPNOTSUPP; + } + + return err; +} + static int lan966x_tc_flower_add(struct lan966x_port *port, struct flow_cls_offload *f, struct vcap_admin *admin, @@ -336,6 +405,11 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, if (err) goto out; + err = lan966x_tc_add_rule_link_target(admin, vrule, + f->common.chain_index); + if (err) + goto out; + frule = flow_cls_offload_flow_rule(f); flow_action_for_each(idx, act, &frule->action) { @@ -365,6 +439,12 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, if (err) goto out; + err = lan966x_tc_add_rule_link(port->lan966x->vcap_ctrl, + admin, vrule, + f, act->chain_index); + if (err) + goto out; + break; default: NL_SET_ERR_MSG_MOD(f->common.extack, -- GitLab From 44d706fde75518781f6859febb0979cf5e7327d4 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 23:09:29 +0100 Subject: [PATCH 0347/2078] net: lan966x: Add support for IS1 VCAP ethernet protocol types IS1 VCAP has it's own list of supported ethernet protocol types which is different than the IS2 VCAP. Therefore separate the list of known protocol types based on the VCAP type. Signed-off-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_tc_flower.c | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 570ac28736e03..47b2f7579dd23 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -5,14 +5,34 @@ #include "vcap_api_client.h" #include "vcap_tc.h" -static bool lan966x_tc_is_known_etype(u16 etype) +static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, + u16 etype) { - switch (etype) { - case ETH_P_ALL: - case ETH_P_ARP: - case ETH_P_IP: - case ETH_P_IPV6: - return true; + switch (st->admin->vtype) { + case VCAP_TYPE_IS1: + switch (etype) { + case ETH_P_ALL: + case ETH_P_ARP: + case ETH_P_IP: + case ETH_P_IPV6: + return true; + } + break; + case VCAP_TYPE_IS2: + switch (etype) { + case ETH_P_ALL: + case ETH_P_ARP: + case ETH_P_IP: + case ETH_P_IPV6: + case ETH_P_SNAP: + case ETH_P_802_2: + return true; + } + break; + default: + NL_SET_ERR_MSG_MOD(st->fco->common.extack, + "VCAP type not supported"); + return false; } return false; @@ -69,7 +89,7 @@ lan966x_tc_flower_handler_basic_usage(struct vcap_tc_flower_parse_usage *st) flow_rule_match_basic(st->frule, &match); if (match.mask->n_proto) { st->l3_proto = be16_to_cpu(match.key->n_proto); - if (!lan966x_tc_is_known_etype(st->l3_proto)) { + if (!lan966x_tc_is_known_etype(st, st->l3_proto)) { err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_ETYPE, st->l3_proto, ~0); if (err) -- GitLab From 438b406055cd21105aad77db7938ee4720b09bee Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 7 Mar 2023 20:45:56 -0700 Subject: [PATCH 0348/2078] tun: flag the device as supporting FMODE_NOWAIT tun already checks for both O_NONBLOCK and IOCB_NOWAIT in its read and write iter handlers, so it's fully ready for FMODE_NOWAIT. But for some reason it doesn't set it. Rectify that oversight. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/3f7dc1f0-79ca-d85c-4d16-8c12c5bd492d@kernel.dk Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ad653b32b2f00..4c7f74904c257 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3463,6 +3463,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + /* tun groks IOCB_NOWAIT just fine, mark it as such */ + file->f_mode |= FMODE_NOWAIT; return 0; } -- GitLab From f758bfec377ad2f15d7683473b1db1cfbf8e1bb0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 7 Mar 2023 21:18:21 -0700 Subject: [PATCH 0349/2078] tap: add support for IOCB_NOWAIT The tap driver already supports passing in nonblocking state based on O_NONBLOCK, add support for checking IOCB_NOWAIT as well. With that done, we can flag it with FMODE_NOWAIT as well. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/8f859870-e6e2-09ca-9c0f-a2aa7c984fb2@kernel.dk Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8941aa199ea33..ce993cc75bf31 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -555,6 +555,9 @@ static int tap_open(struct inode *inode, struct file *file) goto err_put; } + /* tap groks IOCB_NOWAIT just fine, mark it as such */ + file->f_mode |= FMODE_NOWAIT; + dev_put(tap->dev); rtnl_unlock(); @@ -771,8 +774,12 @@ static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; + int noblock = 0; + + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; - return tap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK); + return tap_get_user(q, NULL, from, noblock); } /* Put packet to the user space buffer */ @@ -888,8 +895,12 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; + int noblock = 0; + + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; - ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK, NULL); + ret = tap_do_read(q, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; -- GitLab From 566b6701d5dfc823d1e7ee27a7f0c5719f4b93dd Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 8 Mar 2023 15:17:19 +0200 Subject: [PATCH 0350/2078] skbuff: Replace open-coded skb_propagate_pfmemalloc()s Use skb_propagate_pfmemalloc() in build_skb()/build_skb_around() instead of open-coding it. Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a31815104d61..de465368fc2c2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -422,8 +422,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) if (skb && frag_size) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -445,8 +444,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, if (frag_size) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } -- GitLab From 3c6401266f91c69771176d3b289abfeaec731611 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 8 Mar 2023 15:17:20 +0200 Subject: [PATCH 0351/2078] skbuff: Add likely to skb pointer in build_skb() Similarly to napi_build_skb(), it is likely the skb allocation in build_skb() succeeded. frag_size != 0 is also likely, as stated in __build_skb_around(). Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Reviewed-by: Larysa Zaremba Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index de465368fc2c2..050a875d09c55 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -420,7 +420,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); - if (skb && frag_size) { + if (likely(skb && frag_size)) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } -- GitLab From f94b9bed12e8244717512941494fe83c94773a58 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Mar 2023 15:57:11 +0000 Subject: [PATCH 0352/2078] net: sfp: add A2h presence flag The hwmon code wants to know when it is safe to access the A2h data stored in a separate address. We indicate that this is present when we have SFF-8472 compliance and the lack of an address-change sequence., The same conditions are also true if we want to access other controls and status in the A2h address. So let's make a flag to indicate whether we can access it, instead of repeating the conditions throughout the code. For now, only convert the hwmon code. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c02cad6478a81..4ff07b5a5590e 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -255,6 +255,8 @@ struct sfp { unsigned int module_power_mW; unsigned int module_t_start_up; unsigned int module_t_wait; + + bool have_a2; bool tx_fault_ignore; const struct sfp_quirk *quirk; @@ -1453,20 +1455,10 @@ static void sfp_hwmon_probe(struct work_struct *work) static int sfp_hwmon_insert(struct sfp *sfp) { - if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE) - return 0; - - if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) - return 0; - - if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) - /* This driver in general does not support address - * change. - */ - return 0; - - mod_delayed_work(system_wq, &sfp->hwmon_probe, 1); - sfp->hwmon_tries = R_PROBE_RETRY_SLOW; + if (sfp->have_a2 && sfp->id.ext.diagmon & SFP_DIAGMON_DDM) { + mod_delayed_work(system_wq, &sfp->hwmon_probe, 1); + sfp->hwmon_tries = R_PROBE_RETRY_SLOW; + } return 0; } @@ -1916,6 +1908,18 @@ static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id) return 0; } +static int sfp_module_parse_sff8472(struct sfp *sfp) +{ + /* If the module requires address swap mode, warn about it */ + if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) + dev_warn(sfp->dev, + "module address swap to access page 0xA2 is not supported.\n"); + else + sfp->have_a2 = true; + + return 0; +} + static int sfp_sm_mod_probe(struct sfp *sfp, bool report) { /* SFP module inserted - read I2C data */ @@ -2053,10 +2057,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) return -EINVAL; } - /* If the module requires address swap mode, warn about it */ - if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) - dev_warn(sfp->dev, - "module address swap to access page 0xA2 is not supported.\n"); + if (sfp->id.ext.sff8472_compliance != SFP_SFF8472_COMPLIANCE_NONE) { + ret = sfp_module_parse_sff8472(sfp); + if (ret < 0) + return ret; + } /* Parse the module power requirement */ ret = sfp_module_parse_power(sfp); @@ -2103,6 +2108,7 @@ static void sfp_sm_mod_remove(struct sfp *sfp) memset(&sfp->id, 0, sizeof(sfp->id)); sfp->module_power_mW = 0; + sfp->have_a2 = false; dev_info(sfp->dev, "module removed\n"); } -- GitLab From 5daed426f012a1c0db0048339e359ee98a2c8752 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Mar 2023 15:57:16 +0000 Subject: [PATCH 0353/2078] net: sfp: only use soft polling if we have A2h access The soft state bits are stored in the A2h memory space, and require SFF-8472 compliance. This is what our have_a2 flag tells us, so use this to indicate whether we should attempt to use the soft signals. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4ff07b5a5590e..39e3095796d00 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2284,7 +2284,11 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp->sm_dev_state != SFP_DEV_UP) break; - if (!(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) + /* Only use the soft state bits if we have access to the A2h + * memory, which implies that we have some level of SFF-8472 + * compliance. + */ + if (sfp->have_a2) sfp_soft_start_poll(sfp); sfp_module_tx_enable(sfp); -- GitLab From 939a3f2a76e3216253b0a596b67ce9bd9ff6af51 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 9 Mar 2023 02:54:21 -0800 Subject: [PATCH 0354/2078] ptp_ocp: add force_irq to xilinx_spi configuration Flashing firmware via devlink flash was failing on PTP OCP devices because it is using Quad SPI mode, but the driver was not properly behaving. With force_irq flag landed it now can be fixed. Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20230309105421.2953451-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 4bbaccd543ad6..2b63f34876452 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -662,6 +662,7 @@ static struct ocp_resource ocp_fb_resource[] = { .num_chipselect = 1, .bits_per_word = 8, .num_devices = 1, + .force_irq = true, .devices = &(struct spi_board_info) { .modalias = "spi-nor", }, -- GitLab From aacaf7b3d19daaa91528ab0c598b89a7f82aa47d Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Thu, 9 Mar 2023 13:06:11 +0530 Subject: [PATCH 0355/2078] dt-bindings: net: ti: k3-am654-cpsw-nuss: Document Serdes PHY Update bindings to include Serdes PHY as an optional PHY, in addition to the existing CPSW MAC's PHY. The CPSW MAC's PHY is required while the Serdes PHY is optional. The Serdes PHY handle has to be provided only when the Serdes is being configured in a Single-Link protocol. Using the name "serdes-phy" to represent the Serdes PHY handle, the am65-cpsw-nuss driver can obtain the Serdes PHY and request the Serdes to be configured. Signed-off-by: Siddharth Vadapalli Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- .../bindings/net/ti,k3-am654-cpsw-nuss.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 900063411a201..628d63e1eb1f0 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -126,8 +126,18 @@ properties: description: CPSW port number phys: - maxItems: 1 - description: phandle on phy-gmii-sel PHY + minItems: 1 + items: + - description: CPSW MAC's PHY. + - description: Serdes PHY. Serdes PHY is required only if + the Serdes has to be configured in the + Single-Link configuration. + + phy-names: + minItems: 1 + items: + - const: mac + - const: serdes label: description: label associated with this port -- GitLab From bca93b20c3976ff5ca9fe373cf7acd0277492ae8 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Thu, 9 Mar 2023 13:06:12 +0530 Subject: [PATCH 0356/2078] net: ethernet: ti: am65-cpsw: Update name of Serdes PHY The bindings for the am65-cpsw driver use the name "serdes" to refer to the Serdes PHY. Thus, update the name used for the Serdes PHY within the am65_cpsw_init_serdes_phy() function from "serdes-phy" to "serdes". Signed-off-by: Siddharth Vadapalli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 25996826edabc..4cfbc1c2b1c43 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1466,7 +1466,7 @@ static void am65_cpsw_disable_serdes_phy(struct am65_cpsw_common *common) static int am65_cpsw_init_serdes_phy(struct device *dev, struct device_node *port_np, struct am65_cpsw_port *port) { - const char *name = "serdes-phy"; + const char *name = "serdes"; struct phy *phy; int ret; -- GitLab From 05ccd8d8a15e6b9c99b86cf8a2fd78e3f0c60a84 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 9 Mar 2023 11:59:04 +0000 Subject: [PATCH 0357/2078] sfc: support offloading TC VLAN push/pop actions to the MAE EF100 can pop and/or push up to two VLAN tags. Signed-off-by: Edward Cree Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230309115904.56442-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 16 +++++++++++++ drivers/net/ethernet/sfc/mcdi.h | 5 ++++ drivers/net/ethernet/sfc/tc.c | 42 +++++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/tc.h | 4 ++++ 4 files changed, 67 insertions(+) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 2d32abe5f4781..c53d354c1fb23 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -682,6 +682,10 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) size_t outlen; int rc; + MCDI_POPULATE_DWORD_2(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, + MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push, + MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop); + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID, MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL); MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID, @@ -694,6 +698,18 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) MC_CMD_MAE_COUNTER_ALLOC_OUT_COUNTER_ID_NULL); MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_COUNTER_LIST_ID, MC_CMD_MAE_COUNTER_LIST_ALLOC_OUT_COUNTER_LIST_ID_NULL); + if (act->vlan_push) { + MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN0_TCI_BE, + act->vlan_tci[0]); + MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN0_PROTO_BE, + act->vlan_proto[0]); + } + if (act->vlan_push >= 2) { + MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_TCI_BE, + act->vlan_tci[1]); + MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE, + act->vlan_proto[1]); + } MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); if (act->deliver) diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index b139b76febff1..454e9d51a4c2c 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -233,6 +233,11 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); ((void)BUILD_BUG_ON_ZERO(_field ## _LEN != 2), \ le16_to_cpu(*(__force const __le16 *)MCDI_STRUCT_PTR(_buf, _field))) /* Write a 16-bit field defined in the protocol as being big-endian. */ +#define MCDI_SET_WORD_BE(_buf, _field, _value) do { \ + BUILD_BUG_ON(MC_CMD_ ## _field ## _LEN != 2); \ + BUILD_BUG_ON(MC_CMD_ ## _field ## _OFST & 1); \ + *(__force __be16 *)MCDI_PTR(_buf, _field) = (_value); \ + } while (0) #define MCDI_STRUCT_SET_WORD_BE(_buf, _field, _value) do { \ BUILD_BUG_ON(_field ## _LEN != 2); \ BUILD_BUG_ON(_field ## _OFST & 1); \ diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index deeaab9ee761d..2b07bb2fd735f 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -286,6 +286,8 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */ enum efx_tc_action_order { + EFX_TC_AO_VLAN_POP, + EFX_TC_AO_VLAN_PUSH, EFX_TC_AO_COUNT, EFX_TC_AO_DELIVER }; @@ -294,6 +296,20 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, enum efx_tc_action_order new) { switch (new) { + case EFX_TC_AO_VLAN_POP: + if (act->vlan_pop >= 2) + return false; + /* If we've already pushed a VLAN, we can't then pop it; + * the hardware would instead try to pop an existing VLAN + * before pushing the new one. + */ + if (act->vlan_push) + return false; + fallthrough; + case EFX_TC_AO_VLAN_PUSH: + if (act->vlan_push >= 2) + return false; + fallthrough; case EFX_TC_AO_COUNT: if (act->count) return false; @@ -393,6 +409,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, flow_action_for_each(i, fa, &fr->action) { struct efx_tc_action_set save; + u16 tci; if (!act) { /* more actions after a non-pipe action */ @@ -494,6 +511,31 @@ static int efx_tc_flower_replace(struct efx_nic *efx, } *act = save; break; + case FLOW_ACTION_VLAN_POP: + if (act->vlan_push) { + act->vlan_push--; + } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) { + act->vlan_pop++; + } else { + NL_SET_ERR_MSG_MOD(extack, + "More than two VLAN pops, or action order violated"); + rc = -EINVAL; + goto release; + } + break; + case FLOW_ACTION_VLAN_PUSH: + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) { + rc = -EINVAL; + NL_SET_ERR_MSG_MOD(extack, + "More than two VLAN pushes, or action order violated"); + goto release; + } + tci = fa->vlan.vid & VLAN_VID_MASK; + tci |= fa->vlan.prio << VLAN_PRIO_SHIFT; + act->vlan_tci[act->vlan_push] = cpu_to_be16(tci); + act->vlan_proto[act->vlan_push] = fa->vlan.proto; + act->vlan_push++; + break; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 418ce8c13a06d..542853f60c2a5 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -19,7 +19,11 @@ #define IS_ALL_ONES(v) (!(typeof (v))~(v)) struct efx_tc_action_set { + u16 vlan_push:2; + u16 vlan_pop:2; u16 deliver:1; + __be16 vlan_tci[2]; /* TCIs for vlan_push */ + __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ struct efx_tc_counter_index *count; u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ -- GitLab From 95b744508d4d5135ae2a096ff3f0ee882bcc52b3 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 10 Mar 2023 03:52:05 +0500 Subject: [PATCH 0358/2078] qede: remove linux/version.h and linux/compiler.h make versioncheck reports the following: ./drivers/net/ethernet/qlogic/qede/qede.h: 10 linux/version.h not needed. ./drivers/net/ethernet/qlogic/qede/qede_ethtool.c: 7 linux/version.h not needed. So remove linux/version.h from both of these files. Also remove linux/compiler.h while at it as it is also not being used. Signed-off-by: Muhammad Usama Anjum Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230309225206.2473644-1-usama.anjum@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede.h | 2 -- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f90dcfe9ee688..f9931ecb7baa2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -6,8 +6,6 @@ #ifndef _QEDE_H_ #define _QEDE_H_ -#include -#include #include #include #include diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 8034d812d5a00..374a86b875a3e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -4,7 +4,6 @@ * Copyright (c) 2019-2020 Marvell International Ltd. */ -#include #include #include #include -- GitLab From 5f24f41e8ea62a6a9095f9bbafb8b3aebe265c68 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:26:05 +0800 Subject: [PATCH 0359/2078] xfrm: Remove inner/outer modes from input path The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Removing them from the input path actually allows certain valid combinations that are currently disallowed. In particular, when a transport mode SA sits beneath a tunnel mode SA that changes address families, at present the transport mode SA cannot have AF_UNSPEC as its selector because it will be erroneously be treated as inter-family itself even though it simply sits beneath one. This is a serious problem because you can't set the selector to non-AF_UNSPEC either as that will cause the selector match to fail as we always match selectors to the inner-most traffic. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 66 +++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 436d29640ac2c..39fb91ff23d96 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -231,9 +231,6 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -269,8 +266,6 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -331,22 +326,26 @@ out: */ static int xfrm_inner_mode_encap_remove(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: return xfrm4_remove_beet_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_beet_encap(x, skb); + } break; case XFRM_MODE_TUNNEL: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: return xfrm4_remove_tunnel_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_tunnel_encap(x, skb); break; + } } WARN_ON_ONCE(1); @@ -355,9 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode = &x->inner_mode; - - switch (x->outer_mode.family) { + switch (x->props.family) { case AF_INET: xfrm4_extract_header(skb); break; @@ -369,17 +366,12 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) - return -EAFNOSUPPORT; - } - - switch (inner_mode->family) { - case AF_INET: + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: + case IPPROTO_BEETPH: skb->protocol = htons(ETH_P_IP); break; - case AF_INET6: + case IPPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; default: @@ -387,7 +379,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) break; } - return xfrm_inner_mode_encap_remove(x, inner_mode, skb); + return xfrm_inner_mode_encap_remove(x, skb); } /* Remove encapsulation header. @@ -433,17 +425,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } static int xfrm_inner_mode_input(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: return xfrm_prepare_input(x, skb); case XFRM_MODE_TRANSPORT: - if (inner_mode->family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_input(x, skb); - if (inner_mode->family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_input(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: @@ -461,7 +452,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); - const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; @@ -491,7 +481,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode.family; + family = x->props.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -676,17 +666,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = &x->inner_mode; - - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - } - - if (xfrm_inner_mode_input(x, inner_mode, skb)) { + if (xfrm_inner_mode_input(x, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -701,7 +681,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode.family; + family = x->props.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -732,7 +712,7 @@ resume: err = -EAFNOSUPPORT; rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + afinfo = xfrm_state_afinfo_get_rcu(x->props.family); if (likely(afinfo)) err = afinfo->transport_finish(skb, xfrm_gro || async); rcu_read_unlock(); -- GitLab From f4796398f21b9844017a2dac883b1dd6ad6edd60 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Mar 2023 17:40:32 +0800 Subject: [PATCH 0360/2078] xfrm: Remove inner/outer modes from output path The inner/outer modes were added to abstract out common code that were once duplicated between IPv4 and IPv6. As time went on the abstractions have been removed and we are now left with empty shells that only contain duplicate information. These can be removed one-by-one as the same information is already present elsewhere in the xfrm_state object. Just like the input-side, removing this from the output code makes it possible to use transport-mode SAs underneath an inter-family tunnel mode SA. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ff114d68cc439..369e5de8558ff 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -412,7 +412,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; skb->protocol = htons(ETH_P_IP); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm4_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -435,7 +435,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) skb->ignore_df = 1; skb->protocol = htons(ETH_P_IPV6); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm6_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -451,22 +451,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) { - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_prepare_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_prepare_output(x, skb); break; case XFRM_MODE_TRANSPORT: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_output(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_ro_output(x, skb); WARN_ON_ONCE(1); break; @@ -875,21 +875,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode; - - if (x->sel.family == AF_UNSPEC) - inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb_dst(skb)->ops->family)); - else - inner_mode = &x->inner_mode; - - if (inner_mode == NULL) - return -EAFNOSUPPORT; - - switch (inner_mode->family) { - case AF_INET: + switch (skb->protocol) { + case htons(ETH_P_IP): return xfrm4_extract_output(x, skb); - case AF_INET6: + case htons(ETH_P_IPV6): return xfrm6_extract_output(x, skb); } -- GitLab From 25e289e1f52e1f4fb1d07622c6a24f8d8a8e420d Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 1 Mar 2023 16:20:58 +0200 Subject: [PATCH 0361/2078] wifi: ath11k: use proper regulatory reference for bands Currently, during regulatory event, 2 GHz/5 GHz is referred to as 2G/5G including variable names. However, there is no such entity as 2G or 5G. Re-name such occurences to its proper name. No functional changes. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.5.0.1-01100-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230110121024.14051-2-quic_adisi@quicinc.com --- drivers/net/wireless/ath/ath11k/reg.c | 20 ++++----- drivers/net/wireless/ath/ath11k/wmi.c | 58 ++++++++++++++------------- drivers/net/wireless/ath/ath11k/wmi.h | 28 ++++++------- 3 files changed, 54 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 6fae4e61ede7f..c231abdf49cb0 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -619,7 +619,7 @@ ath11k_reg_build_regd(struct ath11k_base *ab, u32 flags; char alpha2[3]; - num_rules = reg_info->num_5g_reg_rules + reg_info->num_2g_reg_rules; + num_rules = reg_info->num_5ghz_reg_rules + reg_info->num_2ghz_reg_rules; if (!num_rules) goto ret; @@ -644,20 +644,20 @@ ath11k_reg_build_regd(struct ath11k_base *ab, alpha2, ath11k_reg_get_regdom_str(tmp_regd->dfs_region), reg_info->dfs_region, num_rules); /* Update reg_rules[] below. Firmware is expected to - * send these rules in order(2G rules first and then 5G) + * send these rules in order(2 GHz rules first and then 5 GHz) */ for (; i < num_rules; i++) { - if (reg_info->num_2g_reg_rules && - (i < reg_info->num_2g_reg_rules)) { - reg_rule = reg_info->reg_rules_2g_ptr + i; + if (reg_info->num_2ghz_reg_rules && + (i < reg_info->num_2ghz_reg_rules)) { + reg_rule = reg_info->reg_rules_2ghz_ptr + i; max_bw = min_t(u16, reg_rule->max_bw, - reg_info->max_bw_2g); + reg_info->max_bw_2ghz); flags = 0; - } else if (reg_info->num_5g_reg_rules && - (j < reg_info->num_5g_reg_rules)) { - reg_rule = reg_info->reg_rules_5g_ptr + j++; + } else if (reg_info->num_5ghz_reg_rules && + (j < reg_info->num_5ghz_reg_rules)) { + reg_rule = reg_info->reg_rules_5ghz_ptr + j++; max_bw = min_t(u16, reg_rule->max_bw, - reg_info->max_bw_5g); + reg_info->max_bw_5ghz); /* FW doesn't pass NL80211_RRF_AUTO_BW flag for * BW Auto correction, we can enable this by default diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 815f49c3b3adc..ecd51d285103f 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -4959,7 +4959,7 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, const void **tb; const struct wmi_reg_chan_list_cc_event *chan_list_event_hdr; struct wmi_regulatory_rule_struct *wmi_reg_rule; - u32 num_2g_reg_rules, num_5g_reg_rules; + u32 num_2ghz_reg_rules, num_5ghz_reg_rules; int ret; ath11k_dbg(ab, ATH11K_DBG_WMI, "processing regulatory channel list\n"); @@ -4978,10 +4978,10 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, return -EPROTO; } - reg_info->num_2g_reg_rules = chan_list_event_hdr->num_2g_reg_rules; - reg_info->num_5g_reg_rules = chan_list_event_hdr->num_5g_reg_rules; + reg_info->num_2ghz_reg_rules = chan_list_event_hdr->num_2ghz_reg_rules; + reg_info->num_5ghz_reg_rules = chan_list_event_hdr->num_5ghz_reg_rules; - if (!(reg_info->num_2g_reg_rules + reg_info->num_5g_reg_rules)) { + if (!(reg_info->num_2ghz_reg_rules + reg_info->num_5ghz_reg_rules)) { ath11k_warn(ab, "No regulatory rules available in the event info\n"); kfree(tb); return -EINVAL; @@ -5008,46 +5008,48 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, else if (chan_list_event_hdr->status_code == WMI_REG_SET_CC_STATUS_FAIL) reg_info->status_code = REG_SET_CC_STATUS_FAIL; - reg_info->min_bw_2g = chan_list_event_hdr->min_bw_2g; - reg_info->max_bw_2g = chan_list_event_hdr->max_bw_2g; - reg_info->min_bw_5g = chan_list_event_hdr->min_bw_5g; - reg_info->max_bw_5g = chan_list_event_hdr->max_bw_5g; + reg_info->min_bw_2ghz = chan_list_event_hdr->min_bw_2ghz; + reg_info->max_bw_2ghz = chan_list_event_hdr->max_bw_2ghz; + reg_info->min_bw_5ghz = chan_list_event_hdr->min_bw_5ghz; + reg_info->max_bw_5ghz = chan_list_event_hdr->max_bw_5ghz; - num_2g_reg_rules = reg_info->num_2g_reg_rules; - num_5g_reg_rules = reg_info->num_5g_reg_rules; + num_2ghz_reg_rules = reg_info->num_2ghz_reg_rules; + num_5ghz_reg_rules = reg_info->num_5ghz_reg_rules; ath11k_dbg(ab, ATH11K_DBG_WMI, - "%s:cc %s dsf %d BW: min_2g %d max_2g %d min_5g %d max_5g %d", + "%s:cc %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", __func__, reg_info->alpha2, reg_info->dfs_region, - reg_info->min_bw_2g, reg_info->max_bw_2g, - reg_info->min_bw_5g, reg_info->max_bw_5g); + reg_info->min_bw_2ghz, reg_info->max_bw_2ghz, + reg_info->min_bw_5ghz, reg_info->max_bw_5ghz); ath11k_dbg(ab, ATH11K_DBG_WMI, - "%s: num_2g_reg_rules %d num_5g_reg_rules %d", __func__, - num_2g_reg_rules, num_5g_reg_rules); + "%s: num_2ghz_reg_rules %d num_5ghz_reg_rules %d", __func__, + num_2ghz_reg_rules, num_5ghz_reg_rules); wmi_reg_rule = (struct wmi_regulatory_rule_struct *)((u8 *)chan_list_event_hdr + sizeof(*chan_list_event_hdr) + sizeof(struct wmi_tlv)); - if (num_2g_reg_rules) { - reg_info->reg_rules_2g_ptr = create_reg_rules_from_wmi(num_2g_reg_rules, - wmi_reg_rule); - if (!reg_info->reg_rules_2g_ptr) { + if (num_2ghz_reg_rules) { + reg_info->reg_rules_2ghz_ptr = + create_reg_rules_from_wmi(num_2ghz_reg_rules, + wmi_reg_rule); + if (!reg_info->reg_rules_2ghz_ptr) { kfree(tb); - ath11k_warn(ab, "Unable to Allocate memory for 2g rules\n"); + ath11k_warn(ab, "Unable to Allocate memory for 2 GHz rules\n"); return -ENOMEM; } } - if (num_5g_reg_rules) { - wmi_reg_rule += num_2g_reg_rules; - reg_info->reg_rules_5g_ptr = create_reg_rules_from_wmi(num_5g_reg_rules, - wmi_reg_rule); - if (!reg_info->reg_rules_5g_ptr) { + if (num_5ghz_reg_rules) { + wmi_reg_rule += num_2ghz_reg_rules; + reg_info->reg_rules_5ghz_ptr = + create_reg_rules_from_wmi(num_5ghz_reg_rules, + wmi_reg_rule); + if (!reg_info->reg_rules_5ghz_ptr) { kfree(tb); - ath11k_warn(ab, "Unable to Allocate memory for 5g rules\n"); + ath11k_warn(ab, "Unable to Allocate memory for 5 GHz rules\n"); return -ENOMEM; } } @@ -6619,8 +6621,8 @@ fallback: WARN_ON(1); mem_free: if (reg_info) { - kfree(reg_info->reg_rules_2g_ptr); - kfree(reg_info->reg_rules_5g_ptr); + kfree(reg_info->reg_rules_2ghz_ptr); + kfree(reg_info->reg_rules_5ghz_ptr); kfree(reg_info); } return ret; diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 63dbb085949ff..858403f190c2b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -4129,14 +4129,14 @@ struct cur_regulatory_info { u8 alpha2[REG_ALPHA2_LEN + 1]; u32 dfs_region; u32 phybitmap; - u32 min_bw_2g; - u32 max_bw_2g; - u32 min_bw_5g; - u32 max_bw_5g; - u32 num_2g_reg_rules; - u32 num_5g_reg_rules; - struct cur_reg_rule *reg_rules_2g_ptr; - struct cur_reg_rule *reg_rules_5g_ptr; + u32 min_bw_2ghz; + u32 max_bw_2ghz; + u32 min_bw_5ghz; + u32 max_bw_5ghz; + u32 num_2ghz_reg_rules; + u32 num_5ghz_reg_rules; + struct cur_reg_rule *reg_rules_2ghz_ptr; + struct cur_reg_rule *reg_rules_5ghz_ptr; }; struct wmi_reg_chan_list_cc_event { @@ -4148,12 +4148,12 @@ struct wmi_reg_chan_list_cc_event { u32 domain_code; u32 dfs_region; u32 phybitmap; - u32 min_bw_2g; - u32 max_bw_2g; - u32 min_bw_5g; - u32 max_bw_5g; - u32 num_2g_reg_rules; - u32 num_5g_reg_rules; + u32 min_bw_2ghz; + u32 max_bw_2ghz; + u32 min_bw_5ghz; + u32 max_bw_5ghz; + u32 num_2ghz_reg_rules; + u32 num_5ghz_reg_rules; } __packed; struct wmi_regulatory_rule_struct { -- GitLab From 91fa00fa69224aae5afb720c5e68b22e4c4f7333 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 1 Mar 2023 16:20:59 +0200 Subject: [PATCH 0362/2078] wifi: ath11k: add support to parse new WMI event for 6 GHz In order to support different power levels of 6 GHz AP and client, new WMI event for regulatory - WMI_REG_CHAN_LIST_CC_EXT_EVENTID is added in firmware. This event provides new parameters required for 6 GHz regulatory rules. Add support for parsing 2.4 GHz, 5 GHz and 6 GHz reg rules and other parameters from WMI_REG_CHAN_LIST_CC_EXT_EVENTID. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.5.0.1-01100-QCAHKSWPL_SILICONZ-1 Signed-off-by: Lavanya Suresh Signed-off-by: Wen Gong Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230110121024.14051-3-quic_adisi@quicinc.com --- drivers/net/wireless/ath/ath11k/reg.c | 37 ++- drivers/net/wireless/ath/ath11k/wmi.c | 418 +++++++++++++++++++++++++- drivers/net/wireless/ath/ath11k/wmi.h | 163 +++++++++- 3 files changed, 584 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index c231abdf49cb0..08621bd13a125 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -613,7 +613,7 @@ ath11k_reg_build_regd(struct ath11k_base *ab, { struct ieee80211_regdomain *tmp_regd, *default_regd, *new_regd = NULL; struct cur_reg_rule *reg_rule; - u8 i = 0, j = 0; + u8 i = 0, j = 0, k = 0; u8 num_rules; u16 max_bw; u32 flags; @@ -621,6 +621,12 @@ ath11k_reg_build_regd(struct ath11k_base *ab, num_rules = reg_info->num_5ghz_reg_rules + reg_info->num_2ghz_reg_rules; + /* FIXME: Currently taking reg rules for 6 GHz only from Indoor AP mode list. + * This can be updated after complete 6 GHz regulatory support is added. + */ + if (reg_info->is_ext_reg_event) + num_rules += reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP]; + if (!num_rules) goto ret; @@ -666,6 +672,14 @@ ath11k_reg_build_regd(struct ath11k_base *ab, * per other BW rule flags we pass from here */ flags = NL80211_RRF_AUTO_BW; + } else if (reg_info->is_ext_reg_event && + reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP] && + (k < reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP])) { + reg_rule = reg_info->reg_rules_6ghz_ap_ptr[WMI_REG_INDOOR_AP] + + k++; + max_bw = min_t(u16, reg_rule->max_bw, + reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP]); + flags = NL80211_RRF_AUTO_BW; } else { break; } @@ -693,12 +707,21 @@ ath11k_reg_build_regd(struct ath11k_base *ab, continue; } - ath11k_dbg(ab, ATH11K_DBG_REG, - "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", - i + 1, reg_rule->start_freq, reg_rule->end_freq, - max_bw, reg_rule->ant_gain, reg_rule->reg_power, - tmp_regd->reg_rules[i].dfs_cac_ms, - flags); + if (reg_info->is_ext_reg_event) { + ath11k_dbg(ab, ATH11K_DBG_REG, + "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d) (%d, %d)\n", + i + 1, reg_rule->start_freq, reg_rule->end_freq, + max_bw, reg_rule->ant_gain, reg_rule->reg_power, + tmp_regd->reg_rules[i].dfs_cac_ms, flags, + reg_rule->psd_flag, reg_rule->psd_eirp); + } else { + ath11k_dbg(ab, ATH11K_DBG_REG, + "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", + i + 1, reg_rule->start_freq, reg_rule->end_freq, + max_bw, reg_rule->ant_gain, reg_rule->reg_power, + tmp_regd->reg_rules[i].dfs_cac_ms, + flags); + } } tmp_regd->n_reg_rules = i; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index ecd51d285103f..34731d0d1efca 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -105,6 +105,8 @@ static const struct wmi_tlv_policy wmi_tlv_policies[] = { = { .min_len = sizeof(struct wmi_vdev_stopped_event) }, [WMI_TAG_REG_CHAN_LIST_CC_EVENT] = { .min_len = sizeof(struct wmi_reg_chan_list_cc_event) }, + [WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT] + = { .min_len = sizeof(struct wmi_reg_chan_list_cc_ext_event) }, [WMI_TAG_MGMT_RX_HDR] = { .min_len = sizeof(struct wmi_mgmt_rx_hdr) }, [WMI_TAG_MGMT_TX_COMPL_EVENT] @@ -3974,6 +3976,10 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg, wmi_cfg->sched_params = tg_cfg->sched_params; wmi_cfg->twt_ap_pdev_count = tg_cfg->twt_ap_pdev_count; wmi_cfg->twt_ap_sta_count = tg_cfg->twt_ap_sta_count; + wmi_cfg->host_service_flags &= + ~(1 << WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT); + wmi_cfg->host_service_flags |= (tg_cfg->is_reg_cc_ext_event_supported << + WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT); } static int ath11k_init_cmd_send(struct ath11k_pdev_wmi *wmi, @@ -4192,6 +4198,10 @@ int ath11k_wmi_cmd_init(struct ath11k_base *ab) ab->hw_params.hw_ops->wmi_init_config(ab, &config); + if (test_bit(WMI_TLV_SERVICE_REG_CC_EXT_EVENT_SUPPORT, + ab->wmi_ab.svc_map)) + config.is_reg_cc_ext_event_supported = 1; + memcpy(&wmi_sc->wlan_resource_config, &config, sizeof(config)); init_param.res_cfg = &wmi_sc->wlan_resource_config; @@ -4995,18 +5005,11 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, reg_info->phy_id = chan_list_event_hdr->phy_id; reg_info->ctry_code = chan_list_event_hdr->country_id; reg_info->reg_dmn_pair = chan_list_event_hdr->domain_code; - if (chan_list_event_hdr->status_code == WMI_REG_SET_CC_STATUS_PASS) - reg_info->status_code = REG_SET_CC_STATUS_PASS; - else if (chan_list_event_hdr->status_code == WMI_REG_CURRENT_ALPHA2_NOT_FOUND) - reg_info->status_code = REG_CURRENT_ALPHA2_NOT_FOUND; - else if (chan_list_event_hdr->status_code == WMI_REG_INIT_ALPHA2_NOT_FOUND) - reg_info->status_code = REG_INIT_ALPHA2_NOT_FOUND; - else if (chan_list_event_hdr->status_code == WMI_REG_SET_CC_CHANGE_NOT_ALLOWED) - reg_info->status_code = REG_SET_CC_CHANGE_NOT_ALLOWED; - else if (chan_list_event_hdr->status_code == WMI_REG_SET_CC_STATUS_NO_MEMORY) - reg_info->status_code = REG_SET_CC_STATUS_NO_MEMORY; - else if (chan_list_event_hdr->status_code == WMI_REG_SET_CC_STATUS_FAIL) - reg_info->status_code = REG_SET_CC_STATUS_FAIL; + + reg_info->status_code = + ath11k_wmi_cc_setting_code_to_reg(chan_list_event_hdr->status_code); + + reg_info->is_ext_reg_event = false; reg_info->min_bw_2ghz = chan_list_event_hdr->min_bw_2ghz; reg_info->max_bw_2ghz = chan_list_event_hdr->max_bw_2ghz; @@ -5060,6 +5063,372 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, return 0; } +static struct cur_reg_rule +*create_ext_reg_rules_from_wmi(u32 num_reg_rules, + struct wmi_regulatory_ext_rule *wmi_reg_rule) +{ + struct cur_reg_rule *reg_rule_ptr; + u32 count; + + reg_rule_ptr = kcalloc(num_reg_rules, sizeof(*reg_rule_ptr), GFP_ATOMIC); + + if (!reg_rule_ptr) + return NULL; + + for (count = 0; count < num_reg_rules; count++) { + reg_rule_ptr[count].start_freq = + u32_get_bits(wmi_reg_rule[count].freq_info, + REG_RULE_START_FREQ); + reg_rule_ptr[count].end_freq = + u32_get_bits(wmi_reg_rule[count].freq_info, + REG_RULE_END_FREQ); + reg_rule_ptr[count].max_bw = + u32_get_bits(wmi_reg_rule[count].bw_pwr_info, + REG_RULE_MAX_BW); + reg_rule_ptr[count].reg_power = + u32_get_bits(wmi_reg_rule[count].bw_pwr_info, + REG_RULE_REG_PWR); + reg_rule_ptr[count].ant_gain = + u32_get_bits(wmi_reg_rule[count].bw_pwr_info, + REG_RULE_ANT_GAIN); + reg_rule_ptr[count].flags = + u32_get_bits(wmi_reg_rule[count].flag_info, + REG_RULE_FLAGS); + reg_rule_ptr[count].psd_flag = + u32_get_bits(wmi_reg_rule[count].psd_power_info, + REG_RULE_PSD_INFO); + reg_rule_ptr[count].psd_eirp = + u32_get_bits(wmi_reg_rule[count].psd_power_info, + REG_RULE_PSD_EIRP); + } + + return reg_rule_ptr; +} + +static u8 +ath11k_invalid_5ghz_reg_ext_rules_from_wmi(u32 num_reg_rules, + const struct wmi_regulatory_ext_rule *rule) +{ + u8 num_invalid_5ghz_rules = 0; + u32 count, start_freq; + + for (count = 0; count < num_reg_rules; count++) { + start_freq = u32_get_bits(rule[count].freq_info, + REG_RULE_START_FREQ); + + if (start_freq >= ATH11K_MIN_6G_FREQ) + num_invalid_5ghz_rules++; + } + + return num_invalid_5ghz_rules; +} + +static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, + struct sk_buff *skb, + struct cur_regulatory_info *reg_info) +{ + const void **tb; + const struct wmi_reg_chan_list_cc_ext_event *ext_chan_list_event_hdr; + struct wmi_regulatory_ext_rule *ext_wmi_reg_rule; + u32 num_2ghz_reg_rules, num_5ghz_reg_rules; + u32 num_6ghz_reg_rules_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; + u32 num_6ghz_client[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u32 total_reg_rules = 0; + int ret, i, j, num_invalid_5ghz_ext_rules = 0; + + ath11k_dbg(ab, ATH11K_DBG_WMI, "processing regulatory ext channel list\n"); + + tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath11k_warn(ab, "failed to parse tlv: %d\n", ret); + return ret; + } + + ext_chan_list_event_hdr = tb[WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT]; + if (!ext_chan_list_event_hdr) { + ath11k_warn(ab, "failed to fetch reg chan list ext update ev\n"); + kfree(tb); + return -EPROTO; + } + + reg_info->num_2ghz_reg_rules = + ext_chan_list_event_hdr->num_2ghz_reg_rules; + reg_info->num_5ghz_reg_rules = + ext_chan_list_event_hdr->num_5ghz_reg_rules; + reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_lpi; + reg_info->num_6ghz_rules_ap[WMI_REG_STANDARD_POWER_AP] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_sp; + reg_info->num_6ghz_rules_ap[WMI_REG_VERY_LOW_POWER_AP] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_vlp; + + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { + reg_info->num_6ghz_rules_client[WMI_REG_INDOOR_AP][i] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_client_lpi[i]; + reg_info->num_6ghz_rules_client[WMI_REG_STANDARD_POWER_AP][i] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_client_sp[i]; + reg_info->num_6ghz_rules_client[WMI_REG_VERY_LOW_POWER_AP][i] = + ext_chan_list_event_hdr->num_6ghz_reg_rules_client_vlp[i]; + } + + num_2ghz_reg_rules = reg_info->num_2ghz_reg_rules; + num_5ghz_reg_rules = reg_info->num_5ghz_reg_rules; + + total_reg_rules += num_2ghz_reg_rules; + total_reg_rules += num_5ghz_reg_rules; + + if ((num_2ghz_reg_rules > MAX_REG_RULES) || + (num_5ghz_reg_rules > MAX_REG_RULES)) { + ath11k_warn(ab, "Num reg rules for 2.4 GHz/5 GHz exceeds max limit (num_2ghz_reg_rules: %d num_5ghz_reg_rules: %d max_rules: %d)\n", + num_2ghz_reg_rules, num_5ghz_reg_rules, MAX_REG_RULES); + kfree(tb); + return -EINVAL; + } + + for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { + num_6ghz_reg_rules_ap[i] = reg_info->num_6ghz_rules_ap[i]; + + if (num_6ghz_reg_rules_ap[i] > MAX_6GHZ_REG_RULES) { + ath11k_warn(ab, "Num 6 GHz reg rules for AP mode(%d) exceeds max limit (num_6ghz_reg_rules_ap: %d, max_rules: %d)\n", + i, num_6ghz_reg_rules_ap[i], MAX_6GHZ_REG_RULES); + kfree(tb); + return -EINVAL; + } + + total_reg_rules += num_6ghz_reg_rules_ap[i]; + } + + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { + num_6ghz_client[WMI_REG_INDOOR_AP][i] = + reg_info->num_6ghz_rules_client[WMI_REG_INDOOR_AP][i]; + total_reg_rules += num_6ghz_client[WMI_REG_INDOOR_AP][i]; + + num_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = + reg_info->num_6ghz_rules_client[WMI_REG_STANDARD_POWER_AP][i]; + total_reg_rules += num_6ghz_client[WMI_REG_STANDARD_POWER_AP][i]; + + num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = + reg_info->num_6ghz_rules_client[WMI_REG_VERY_LOW_POWER_AP][i]; + total_reg_rules += num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i]; + + if ((num_6ghz_client[WMI_REG_INDOOR_AP][i] > MAX_6GHZ_REG_RULES) || + (num_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] > + MAX_6GHZ_REG_RULES) || + (num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] > + MAX_6GHZ_REG_RULES)) { + ath11k_warn(ab, + "Num 6 GHz client reg rules exceeds max limit, for client(type: %d)\n", + i); + kfree(tb); + return -EINVAL; + } + } + + if (!total_reg_rules) { + ath11k_warn(ab, "No reg rules available\n"); + kfree(tb); + return -EINVAL; + } + + memcpy(reg_info->alpha2, &ext_chan_list_event_hdr->alpha2, + REG_ALPHA2_LEN); + + reg_info->dfs_region = ext_chan_list_event_hdr->dfs_region; + reg_info->phybitmap = ext_chan_list_event_hdr->phybitmap; + reg_info->num_phy = ext_chan_list_event_hdr->num_phy; + reg_info->phy_id = ext_chan_list_event_hdr->phy_id; + reg_info->ctry_code = ext_chan_list_event_hdr->country_id; + reg_info->reg_dmn_pair = ext_chan_list_event_hdr->domain_code; + + reg_info->status_code = + ath11k_wmi_cc_setting_code_to_reg(ext_chan_list_event_hdr->status_code); + + reg_info->is_ext_reg_event = true; + + reg_info->min_bw_2ghz = ext_chan_list_event_hdr->min_bw_2ghz; + reg_info->max_bw_2ghz = ext_chan_list_event_hdr->max_bw_2ghz; + reg_info->min_bw_5ghz = ext_chan_list_event_hdr->min_bw_5ghz; + reg_info->max_bw_5ghz = ext_chan_list_event_hdr->max_bw_5ghz; + + reg_info->min_bw_6ghz_ap[WMI_REG_INDOOR_AP] = + ext_chan_list_event_hdr->min_bw_6ghz_ap_lpi; + reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP] = + ext_chan_list_event_hdr->max_bw_6ghz_ap_lpi; + reg_info->min_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = + ext_chan_list_event_hdr->min_bw_6ghz_ap_sp; + reg_info->max_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = + ext_chan_list_event_hdr->max_bw_6ghz_ap_sp; + reg_info->min_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = + ext_chan_list_event_hdr->min_bw_6ghz_ap_vlp; + reg_info->max_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = + ext_chan_list_event_hdr->max_bw_6ghz_ap_vlp; + + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { + reg_info->min_bw_6ghz_client[WMI_REG_INDOOR_AP][i] = + ext_chan_list_event_hdr->min_bw_6ghz_client_lpi[i]; + reg_info->max_bw_6ghz_client[WMI_REG_INDOOR_AP][i] = + ext_chan_list_event_hdr->max_bw_6ghz_client_lpi[i]; + reg_info->min_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = + ext_chan_list_event_hdr->min_bw_6ghz_client_sp[i]; + reg_info->max_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = + ext_chan_list_event_hdr->max_bw_6ghz_client_sp[i]; + reg_info->min_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = + ext_chan_list_event_hdr->min_bw_6ghz_client_vlp[i]; + reg_info->max_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = + ext_chan_list_event_hdr->max_bw_6ghz_client_vlp[i]; + } + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "%s:cc_ext %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", + __func__, reg_info->alpha2, reg_info->dfs_region, + reg_info->min_bw_2ghz, reg_info->max_bw_2ghz, + reg_info->min_bw_5ghz, reg_info->max_bw_5ghz); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "num_2ghz_reg_rules %d num_5ghz_reg_rules %d", + num_2ghz_reg_rules, num_5ghz_reg_rules); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "num_6ghz_reg_rules_ap_lpi: %d num_6ghz_reg_rules_ap_sp: %d num_6ghz_reg_rules_ap_vlp: %d", + num_6ghz_reg_rules_ap[WMI_REG_INDOOR_AP], + num_6ghz_reg_rules_ap[WMI_REG_STANDARD_POWER_AP], + num_6ghz_reg_rules_ap[WMI_REG_VERY_LOW_POWER_AP]); + + j = WMI_REG_DEFAULT_CLIENT; + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz Regular client: num_6ghz_reg_rules_lpi: %d num_6ghz_reg_rules_sp: %d num_6ghz_reg_rules_vlp: %d", + num_6ghz_client[WMI_REG_INDOOR_AP][j], + num_6ghz_client[WMI_REG_STANDARD_POWER_AP][j], + num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][j]); + + j = WMI_REG_SUBORDINATE_CLIENT; + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz Subordinate client: num_6ghz_reg_rules_lpi: %d num_6ghz_reg_rules_sp: %d num_6ghz_reg_rules_vlp: %d", + num_6ghz_client[WMI_REG_INDOOR_AP][j], + num_6ghz_client[WMI_REG_STANDARD_POWER_AP][j], + num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][j]); + + ext_wmi_reg_rule = + (struct wmi_regulatory_ext_rule *)((u8 *)ext_chan_list_event_hdr + + sizeof(*ext_chan_list_event_hdr) + + sizeof(struct wmi_tlv)); + if (num_2ghz_reg_rules) { + reg_info->reg_rules_2ghz_ptr = + create_ext_reg_rules_from_wmi(num_2ghz_reg_rules, + ext_wmi_reg_rule); + + if (!reg_info->reg_rules_2ghz_ptr) { + kfree(tb); + ath11k_warn(ab, "Unable to Allocate memory for 2 GHz rules\n"); + return -ENOMEM; + } + } + + ext_wmi_reg_rule += num_2ghz_reg_rules; + + /* Firmware might include 6 GHz reg rule in 5 GHz rule list + * for few countries along with separate 6 GHz rule. + * Having same 6 GHz reg rule in 5 GHz and 6 GHz rules list + * causes intersect check to be true, and same rules will be + * shown multiple times in iw cmd. + * Hence, avoid parsing 6 GHz rule from 5 GHz reg rule list + */ + num_invalid_5ghz_ext_rules = + ath11k_invalid_5ghz_reg_ext_rules_from_wmi(num_5ghz_reg_rules, + ext_wmi_reg_rule); + + if (num_invalid_5ghz_ext_rules) { + ath11k_dbg(ab, ATH11K_DBG_WMI, + "CC: %s 5 GHz reg rules number %d from fw, %d number of invalid 5 GHz rules", + reg_info->alpha2, reg_info->num_5ghz_reg_rules, + num_invalid_5ghz_ext_rules); + + num_5ghz_reg_rules = num_5ghz_reg_rules - num_invalid_5ghz_ext_rules; + reg_info->num_5ghz_reg_rules = num_5ghz_reg_rules; + } + + if (num_5ghz_reg_rules) { + reg_info->reg_rules_5ghz_ptr = + create_ext_reg_rules_from_wmi(num_5ghz_reg_rules, + ext_wmi_reg_rule); + + if (!reg_info->reg_rules_5ghz_ptr) { + kfree(tb); + ath11k_warn(ab, "Unable to Allocate memory for 5 GHz rules\n"); + return -ENOMEM; + } + } + + /* We have adjusted the number of 5 GHz reg rules above. But still those + * many rules needs to be adjusted in ext_wmi_reg_rule. + * + * NOTE: num_invalid_5ghz_ext_rules will be 0 for rest other cases. + */ + ext_wmi_reg_rule += (num_5ghz_reg_rules + num_invalid_5ghz_ext_rules); + + for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { + reg_info->reg_rules_6ghz_ap_ptr[i] = + create_ext_reg_rules_from_wmi(num_6ghz_reg_rules_ap[i], + ext_wmi_reg_rule); + + if (!reg_info->reg_rules_6ghz_ap_ptr[i]) { + kfree(tb); + ath11k_warn(ab, "Unable to Allocate memory for 6 GHz AP rules\n"); + return -ENOMEM; + } + + ext_wmi_reg_rule += num_6ghz_reg_rules_ap[i]; + } + + for (j = 0; j < WMI_REG_CURRENT_MAX_AP_TYPE; j++) { + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { + reg_info->reg_rules_6ghz_client_ptr[j][i] = + create_ext_reg_rules_from_wmi(num_6ghz_client[j][i], + ext_wmi_reg_rule); + + if (!reg_info->reg_rules_6ghz_client_ptr[j][i]) { + kfree(tb); + ath11k_warn(ab, "Unable to Allocate memory for 6 GHz client rules\n"); + return -ENOMEM; + } + + ext_wmi_reg_rule += num_6ghz_client[j][i]; + } + } + + reg_info->client_type = ext_chan_list_event_hdr->client_type; + reg_info->rnr_tpe_usable = ext_chan_list_event_hdr->rnr_tpe_usable; + reg_info->unspecified_ap_usable = + ext_chan_list_event_hdr->unspecified_ap_usable; + reg_info->domain_code_6ghz_ap[WMI_REG_INDOOR_AP] = + ext_chan_list_event_hdr->domain_code_6ghz_ap_lpi; + reg_info->domain_code_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = + ext_chan_list_event_hdr->domain_code_6ghz_ap_sp; + reg_info->domain_code_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = + ext_chan_list_event_hdr->domain_code_6ghz_ap_vlp; + + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { + reg_info->domain_code_6ghz_client[WMI_REG_INDOOR_AP][i] = + ext_chan_list_event_hdr->domain_code_6ghz_client_lpi[i]; + reg_info->domain_code_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = + ext_chan_list_event_hdr->domain_code_6ghz_client_sp[i]; + reg_info->domain_code_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = + ext_chan_list_event_hdr->domain_code_6ghz_client_vlp[i]; + } + + reg_info->domain_code_6ghz_super_id = + ext_chan_list_event_hdr->domain_code_6ghz_super_id; + + ath11k_dbg(ab, ATH11K_DBG_WMI, "6 GHz client_type: %d domain_code_6ghz_super_id: %d", + reg_info->client_type, reg_info->domain_code_6ghz_super_id); + + ath11k_dbg(ab, ATH11K_DBG_WMI, "processed regulatory ext channel list\n"); + + kfree(tb); + return 0; +} + static int ath11k_pull_peer_del_resp_ev(struct ath11k_base *ab, struct sk_buff *skb, struct wmi_peer_delete_resp_event *peer_del_resp) { @@ -6507,12 +6876,14 @@ static bool ath11k_reg_is_world_alpha(char *alpha) return false; } -static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *skb) +static int ath11k_reg_chan_list_event(struct ath11k_base *ab, + struct sk_buff *skb, + enum wmi_reg_chan_list_cmd_type id) { struct cur_regulatory_info *reg_info = NULL; struct ieee80211_regdomain *regd = NULL; bool intersect = false; - int ret = 0, pdev_idx; + int ret = 0, pdev_idx, i, j; struct ath11k *ar; reg_info = kzalloc(sizeof(*reg_info), GFP_ATOMIC); @@ -6521,7 +6892,11 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk goto fallback; } - ret = ath11k_pull_reg_chan_list_update_ev(ab, skb, reg_info); + if (id == WMI_REG_CHAN_LIST_CC_ID) + ret = ath11k_pull_reg_chan_list_update_ev(ab, skb, reg_info); + else + ret = ath11k_pull_reg_chan_list_ext_update_ev(ab, skb, reg_info); + if (ret) { ath11k_warn(ab, "failed to extract regulatory info from received event\n"); goto fallback; @@ -6623,6 +6998,14 @@ mem_free: if (reg_info) { kfree(reg_info->reg_rules_2ghz_ptr); kfree(reg_info->reg_rules_5ghz_ptr); + if (reg_info->is_ext_reg_event) { + for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) + kfree(reg_info->reg_rules_6ghz_ap_ptr[i]); + + for (j = 0; j < WMI_REG_CURRENT_MAX_AP_TYPE; j++) + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) + kfree(reg_info->reg_rules_6ghz_client_ptr[j][i]); + } kfree(reg_info); } return ret; @@ -8054,7 +8437,10 @@ static void ath11k_wmi_tlv_op_rx(struct ath11k_base *ab, struct sk_buff *skb) ath11k_service_ready_ext2_event(ab, skb); break; case WMI_REG_CHAN_LIST_CC_EVENTID: - ath11k_reg_chan_list_event(ab, skb); + ath11k_reg_chan_list_event(ab, skb, WMI_REG_CHAN_LIST_CC_ID); + break; + case WMI_REG_CHAN_LIST_CC_EXT_EVENTID: + ath11k_reg_chan_list_event(ab, skb, WMI_REG_CHAN_LIST_CC_EXT_ID); break; case WMI_READY_EVENTID: ath11k_ready_event(ab, skb); diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 858403f190c2b..3ec3fa0aa5c87 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -797,6 +797,7 @@ enum wmi_tlv_event_id { WMI_RMC_NEW_LEADER_EVENTID = WMI_TLV_CMD(WMI_GRP_RMC), WMI_REG_CHAN_LIST_CC_EVENTID = WMI_TLV_CMD(WMI_GRP_REGULATORY), WMI_11D_NEW_COUNTRY_EVENTID, + WMI_REG_CHAN_LIST_CC_EXT_EVENTID, WMI_NDI_CAP_RSP_EVENTID = WMI_TLV_CMD(WMI_GRP_PROTOTYPE), WMI_NDP_INITIATOR_RSP_EVENTID, WMI_NDP_RESPONDER_RSP_EVENTID, @@ -1865,6 +1866,8 @@ enum wmi_tlv_tag { WMI_TAG_PDEV_SRG_OBSS_BSSID_ENABLE_BITMAP_CMD, WMI_TAG_PDEV_NON_SRG_OBSS_COLOR_ENABLE_BITMAP_CMD, WMI_TAG_PDEV_NON_SRG_OBSS_BSSID_ENABLE_BITMAP_CMD, + WMI_TAG_REGULATORY_RULE_EXT_STRUCT = 0x3A9, + WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT, WMI_TAG_PDEV_SET_BIOS_SAR_TABLE_CMD = 0x3D8, WMI_TAG_PDEV_SET_BIOS_GEO_TABLE_CMD, WMI_TAG_MAX @@ -2097,6 +2100,7 @@ enum wmi_tlv_service { /* The second 128 bits */ WMI_MAX_EXT_SERVICE = 256, + WMI_TLV_SERVICE_REG_CC_EXT_EVENT_SUPPORT = 281, WMI_TLV_SERVICE_BIOS_SAR_SUPPORT = 326, /* The third 128 bits */ @@ -2313,6 +2317,8 @@ struct wmi_init_cmd { #define WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64 BIT(5) #define WMI_RSRC_CFG_FLAG1_ACK_RSSI BIT(18) +#define WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT 4 + struct wmi_resource_config { u32 tlv_header; u32 num_vdevs; @@ -2372,6 +2378,15 @@ struct wmi_resource_config { u32 sched_params; u32 twt_ap_pdev_count; u32 twt_ap_sta_count; + u32 max_nlo_ssids; + u32 num_pkt_filters; + u32 num_max_sta_vdevs; + u32 max_bssid_indicator; + u32 ul_resp_config; + u32 msdu_flow_override_config0; + u32 msdu_flow_override_config1; + u32 flags2; + u32 host_service_flags; } __packed; struct wmi_service_ready_event { @@ -2854,6 +2869,8 @@ struct rx_reorder_queue_remove_params { #define REG_RULE_MAX_BW 0x0000ffff #define REG_RULE_REG_PWR 0x00ff0000 #define REG_RULE_ANT_GAIN 0xff000000 +#define REG_RULE_PSD_INFO BIT(0) +#define REG_RULE_PSD_EIRP 0xff0000 #define WMI_VDEV_PARAM_TXBF_SU_TX_BFEE BIT(0) #define WMI_VDEV_PARAM_TXBF_MU_TX_BFEE BIT(1) @@ -4049,6 +4066,7 @@ struct wmi_he_rate_set { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 +#define MAX_6GHZ_REG_RULES 5 enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, @@ -4079,16 +4097,6 @@ enum wmi_vdev_start_resp_status_code { WMI_VDEV_START_RESPONSE_INVALID_REGDOMAIN = 4, }; -; -enum cc_setting_code { - REG_SET_CC_STATUS_PASS = 0, - REG_CURRENT_ALPHA2_NOT_FOUND = 1, - REG_INIT_ALPHA2_NOT_FOUND = 2, - REG_SET_CC_CHANGE_NOT_ALLOWED = 3, - REG_SET_CC_STATUS_NO_MEMORY = 4, - REG_SET_CC_STATUS_FAIL = 5, -}; - /* Regaulatory Rule Flags Passed by FW */ #define REGULATORY_CHAN_DISABLED BIT(0) #define REGULATORY_CHAN_NO_IR BIT(1) @@ -4102,13 +4110,72 @@ enum cc_setting_code { #define REGULATORY_CHAN_NO_20MHZ BIT(11) #define REGULATORY_CHAN_NO_10MHZ BIT(12) -enum { +enum wmi_reg_chan_list_cmd_type { + WMI_REG_CHAN_LIST_CC_ID = 0, + WMI_REG_CHAN_LIST_CC_EXT_ID = 1, +}; + +enum wmi_reg_cc_setting_code { WMI_REG_SET_CC_STATUS_PASS = 0, WMI_REG_CURRENT_ALPHA2_NOT_FOUND = 1, WMI_REG_INIT_ALPHA2_NOT_FOUND = 2, WMI_REG_SET_CC_CHANGE_NOT_ALLOWED = 3, WMI_REG_SET_CC_STATUS_NO_MEMORY = 4, WMI_REG_SET_CC_STATUS_FAIL = 5, + + /* add new setting code above, update in + * @enum cc_setting_code as well. + * Also handle it in ath11k_wmi_cc_setting_code_to_reg() + */ +}; + +enum cc_setting_code { + REG_SET_CC_STATUS_PASS = 0, + REG_CURRENT_ALPHA2_NOT_FOUND = 1, + REG_INIT_ALPHA2_NOT_FOUND = 2, + REG_SET_CC_CHANGE_NOT_ALLOWED = 3, + REG_SET_CC_STATUS_NO_MEMORY = 4, + REG_SET_CC_STATUS_FAIL = 5, + + /* add new setting code above, update in + * @enum wmi_reg_cc_setting_code as well. + */ +}; + +static inline enum cc_setting_code +ath11k_wmi_cc_setting_code_to_reg(enum wmi_reg_cc_setting_code status_code) +{ + switch (status_code) { + case WMI_REG_SET_CC_STATUS_PASS: + return REG_SET_CC_STATUS_PASS; + case WMI_REG_CURRENT_ALPHA2_NOT_FOUND: + return REG_CURRENT_ALPHA2_NOT_FOUND; + case WMI_REG_INIT_ALPHA2_NOT_FOUND: + return REG_INIT_ALPHA2_NOT_FOUND; + case WMI_REG_SET_CC_CHANGE_NOT_ALLOWED: + return REG_SET_CC_CHANGE_NOT_ALLOWED; + case WMI_REG_SET_CC_STATUS_NO_MEMORY: + return REG_SET_CC_STATUS_NO_MEMORY; + case WMI_REG_SET_CC_STATUS_FAIL: + return REG_SET_CC_STATUS_FAIL; + } + + return REG_SET_CC_STATUS_FAIL; +} + +enum wmi_reg_6ghz_ap_type { + WMI_REG_INDOOR_AP = 0, + WMI_REG_STANDARD_POWER_AP = 1, + WMI_REG_VERY_LOW_POWER_AP = 2, + + WMI_REG_CURRENT_MAX_AP_TYPE, + WMI_REG_MAX_AP_TYPE = 7, +}; + +enum wmi_reg_6ghz_client_type { + WMI_REG_DEFAULT_CLIENT = 0, + WMI_REG_SUBORDINATE_CLIENT = 1, + WMI_REG_MAX_CLIENT_TYPE = 2, }; struct cur_reg_rule { @@ -4118,6 +4185,8 @@ struct cur_reg_rule { u8 reg_power; u8 ant_gain; u16 flags; + bool psd_flag; + s8 psd_eirp; }; struct cur_regulatory_info { @@ -4137,6 +4206,22 @@ struct cur_regulatory_info { u32 num_5ghz_reg_rules; struct cur_reg_rule *reg_rules_2ghz_ptr; struct cur_reg_rule *reg_rules_5ghz_ptr; + bool is_ext_reg_event; + enum wmi_reg_6ghz_client_type client_type; + bool rnr_tpe_usable; + bool unspecified_ap_usable; + u8 domain_code_6ghz_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; + u8 domain_code_6ghz_client[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u32 domain_code_6ghz_super_id; + u32 min_bw_6ghz_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; + u32 max_bw_6ghz_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; + u32 min_bw_6ghz_client[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u32 max_bw_6ghz_client[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u32 num_6ghz_rules_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; + u32 num_6ghz_rules_client[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + struct cur_reg_rule *reg_rules_6ghz_ap_ptr[WMI_REG_CURRENT_MAX_AP_TYPE]; + struct cur_reg_rule *reg_rules_6ghz_client_ptr + [WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; }; struct wmi_reg_chan_list_cc_event { @@ -4163,6 +4248,61 @@ struct wmi_regulatory_rule_struct { u32 flag_info; }; +#define WMI_REG_CLIENT_MAX 4 + +struct wmi_reg_chan_list_cc_ext_event { + u32 status_code; + u32 phy_id; + u32 alpha2; + u32 num_phy; + u32 country_id; + u32 domain_code; + u32 dfs_region; + u32 phybitmap; + u32 min_bw_2ghz; + u32 max_bw_2ghz; + u32 min_bw_5ghz; + u32 max_bw_5ghz; + u32 num_2ghz_reg_rules; + u32 num_5ghz_reg_rules; + u32 client_type; + u32 rnr_tpe_usable; + u32 unspecified_ap_usable; + u32 domain_code_6ghz_ap_lpi; + u32 domain_code_6ghz_ap_sp; + u32 domain_code_6ghz_ap_vlp; + u32 domain_code_6ghz_client_lpi[WMI_REG_CLIENT_MAX]; + u32 domain_code_6ghz_client_sp[WMI_REG_CLIENT_MAX]; + u32 domain_code_6ghz_client_vlp[WMI_REG_CLIENT_MAX]; + u32 domain_code_6ghz_super_id; + u32 min_bw_6ghz_ap_sp; + u32 max_bw_6ghz_ap_sp; + u32 min_bw_6ghz_ap_lpi; + u32 max_bw_6ghz_ap_lpi; + u32 min_bw_6ghz_ap_vlp; + u32 max_bw_6ghz_ap_vlp; + u32 min_bw_6ghz_client_sp[WMI_REG_CLIENT_MAX]; + u32 max_bw_6ghz_client_sp[WMI_REG_CLIENT_MAX]; + u32 min_bw_6ghz_client_lpi[WMI_REG_CLIENT_MAX]; + u32 max_bw_6ghz_client_lpi[WMI_REG_CLIENT_MAX]; + u32 min_bw_6ghz_client_vlp[WMI_REG_CLIENT_MAX]; + u32 max_bw_6ghz_client_vlp[WMI_REG_CLIENT_MAX]; + u32 num_6ghz_reg_rules_ap_sp; + u32 num_6ghz_reg_rules_ap_lpi; + u32 num_6ghz_reg_rules_ap_vlp; + u32 num_6ghz_reg_rules_client_sp[WMI_REG_CLIENT_MAX]; + u32 num_6ghz_reg_rules_client_lpi[WMI_REG_CLIENT_MAX]; + u32 num_6ghz_reg_rules_client_vlp[WMI_REG_CLIENT_MAX]; +} __packed; + +struct wmi_regulatory_ext_rule { + u32 tlv_header; + u32 freq_info; + u32 bw_pwr_info; + u32 flag_info; + u32 psd_power_info; +} __packed; + struct wmi_vdev_delete_resp_event { u32 vdev_id; } __packed; @@ -5358,6 +5498,7 @@ struct target_resource_config { u32 sched_params; u32 twt_ap_pdev_count; u32 twt_ap_sta_count; + u8 is_reg_cc_ext_event_supported; }; enum wmi_debug_log_param { -- GitLab From e238e62ba8868a784e485eb94451c87cd1b85cee Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 1 Mar 2023 16:20:59 +0200 Subject: [PATCH 0363/2078] wifi: ath11k: add debug prints in regulatory WMI event processing Add some more debug prints in processing regulatory WMI event in order to increase more debuggability. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.5.0.1-01100-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230110121024.14051-4-quic_adisi@quicinc.com --- drivers/net/wireless/ath/ath11k/reg.c | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 207 ++++++++++++++++++-------- drivers/net/wireless/ath/ath11k/wmi.h | 142 ++++++++++++++++++ 3 files changed, 291 insertions(+), 60 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 08621bd13a125..67443457f4daa 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -646,7 +646,7 @@ ath11k_reg_build_regd(struct ath11k_base *ab, tmp_regd->dfs_region = ath11k_map_fw_dfs_region(reg_info->dfs_region); ath11k_dbg(ab, ATH11K_DBG_REG, - "\r\nCountry %s, CFG Regdomain %s FW Regdomain %d, num_reg_rules %d\n", + "Country %s, CFG Regdomain %s FW Regdomain %d, num_reg_rules %d\n", alpha2, ath11k_reg_get_regdom_str(tmp_regd->dfs_region), reg_info->dfs_region, num_rules); /* Update reg_rules[] below. Firmware is expected to diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 34731d0d1efca..27f3fceb33c58 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -4925,6 +4925,26 @@ static int ath11k_pull_vdev_start_resp_tlv(struct ath11k_base *ab, struct sk_buf return 0; } +static void ath11k_print_reg_rule(struct ath11k_base *ab, const char *band, + u32 num_reg_rules, + struct cur_reg_rule *reg_rule_ptr) +{ + struct cur_reg_rule *reg_rule = reg_rule_ptr; + u32 count; + + ath11k_dbg(ab, ATH11K_DBG_WMI, "number of reg rules in %s band: %d\n", + band, num_reg_rules); + + for (count = 0; count < num_reg_rules; count++) { + ath11k_dbg(ab, ATH11K_DBG_WMI, + "reg rule %d: (%d - %d @ %d) (%d, %d) (FLAGS %d)\n", + count + 1, reg_rule->start_freq, reg_rule->end_freq, + reg_rule->max_bw, reg_rule->ant_gain, + reg_rule->reg_power, reg_rule->flags); + reg_rule++; + } +} + static struct cur_reg_rule *create_reg_rules_from_wmi(u32 num_reg_rules, struct wmi_regulatory_rule_struct *wmi_reg_rule) @@ -5006,6 +5026,10 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, reg_info->ctry_code = chan_list_event_hdr->country_id; reg_info->reg_dmn_pair = chan_list_event_hdr->domain_code; + ath11k_dbg(ab, ATH11K_DBG_WMI, + "status_code %s", + ath11k_cc_status_to_str(reg_info->status_code)); + reg_info->status_code = ath11k_wmi_cc_setting_code_to_reg(chan_list_event_hdr->status_code); @@ -5020,13 +5044,13 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, num_5ghz_reg_rules = reg_info->num_5ghz_reg_rules; ath11k_dbg(ab, ATH11K_DBG_WMI, - "%s:cc %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", - __func__, reg_info->alpha2, reg_info->dfs_region, + "cc %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", + reg_info->alpha2, reg_info->dfs_region, reg_info->min_bw_2ghz, reg_info->max_bw_2ghz, reg_info->min_bw_5ghz, reg_info->max_bw_5ghz); ath11k_dbg(ab, ATH11K_DBG_WMI, - "%s: num_2ghz_reg_rules %d num_5ghz_reg_rules %d", __func__, + "num_2ghz_reg_rules %d num_5ghz_reg_rules %d", num_2ghz_reg_rules, num_5ghz_reg_rules); wmi_reg_rule = @@ -5043,6 +5067,10 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, ath11k_warn(ab, "Unable to Allocate memory for 2 GHz rules\n"); return -ENOMEM; } + + ath11k_print_reg_rule(ab, "2 GHz", + num_2ghz_reg_rules, + reg_info->reg_rules_2ghz_ptr); } if (num_5ghz_reg_rules) { @@ -5055,6 +5083,10 @@ static int ath11k_pull_reg_chan_list_update_ev(struct ath11k_base *ab, ath11k_warn(ab, "Unable to Allocate memory for 5 GHz rules\n"); return -ENOMEM; } + + ath11k_print_reg_rule(ab, "5 GHz", + num_5ghz_reg_rules, + reg_info->reg_rules_5ghz_ptr); } ath11k_dbg(ab, ATH11K_DBG_WMI, "processed regulatory channel list\n"); @@ -5128,7 +5160,7 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, struct cur_regulatory_info *reg_info) { const void **tb; - const struct wmi_reg_chan_list_cc_ext_event *ext_chan_list_event_hdr; + const struct wmi_reg_chan_list_cc_ext_event *ev; struct wmi_regulatory_ext_rule *ext_wmi_reg_rule; u32 num_2ghz_reg_rules, num_5ghz_reg_rules; u32 num_6ghz_reg_rules_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; @@ -5145,31 +5177,29 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, return ret; } - ext_chan_list_event_hdr = tb[WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT]; - if (!ext_chan_list_event_hdr) { + ev = tb[WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT]; + if (!ev) { ath11k_warn(ab, "failed to fetch reg chan list ext update ev\n"); kfree(tb); return -EPROTO; } - reg_info->num_2ghz_reg_rules = - ext_chan_list_event_hdr->num_2ghz_reg_rules; - reg_info->num_5ghz_reg_rules = - ext_chan_list_event_hdr->num_5ghz_reg_rules; + reg_info->num_2ghz_reg_rules = ev->num_2ghz_reg_rules; + reg_info->num_5ghz_reg_rules = ev->num_5ghz_reg_rules; reg_info->num_6ghz_rules_ap[WMI_REG_INDOOR_AP] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_lpi; + ev->num_6ghz_reg_rules_ap_lpi; reg_info->num_6ghz_rules_ap[WMI_REG_STANDARD_POWER_AP] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_sp; + ev->num_6ghz_reg_rules_ap_sp; reg_info->num_6ghz_rules_ap[WMI_REG_VERY_LOW_POWER_AP] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_ap_vlp; + ev->num_6ghz_reg_rules_ap_vlp; for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { reg_info->num_6ghz_rules_client[WMI_REG_INDOOR_AP][i] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_client_lpi[i]; + ev->num_6ghz_reg_rules_client_lpi[i]; reg_info->num_6ghz_rules_client[WMI_REG_STANDARD_POWER_AP][i] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_client_sp[i]; + ev->num_6ghz_reg_rules_client_sp[i]; reg_info->num_6ghz_rules_client[WMI_REG_VERY_LOW_POWER_AP][i] = - ext_chan_list_event_hdr->num_6ghz_reg_rules_client_vlp[i]; + ev->num_6ghz_reg_rules_client_vlp[i]; } num_2ghz_reg_rules = reg_info->num_2ghz_reg_rules; @@ -5231,57 +5261,79 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, return -EINVAL; } - memcpy(reg_info->alpha2, &ext_chan_list_event_hdr->alpha2, - REG_ALPHA2_LEN); + memcpy(reg_info->alpha2, &ev->alpha2, REG_ALPHA2_LEN); - reg_info->dfs_region = ext_chan_list_event_hdr->dfs_region; - reg_info->phybitmap = ext_chan_list_event_hdr->phybitmap; - reg_info->num_phy = ext_chan_list_event_hdr->num_phy; - reg_info->phy_id = ext_chan_list_event_hdr->phy_id; - reg_info->ctry_code = ext_chan_list_event_hdr->country_id; - reg_info->reg_dmn_pair = ext_chan_list_event_hdr->domain_code; + reg_info->dfs_region = ev->dfs_region; + reg_info->phybitmap = ev->phybitmap; + reg_info->num_phy = ev->num_phy; + reg_info->phy_id = ev->phy_id; + reg_info->ctry_code = ev->country_id; + reg_info->reg_dmn_pair = ev->domain_code; + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "status_code %s", + ath11k_cc_status_to_str(reg_info->status_code)); reg_info->status_code = - ath11k_wmi_cc_setting_code_to_reg(ext_chan_list_event_hdr->status_code); + ath11k_wmi_cc_setting_code_to_reg(ev->status_code); reg_info->is_ext_reg_event = true; - reg_info->min_bw_2ghz = ext_chan_list_event_hdr->min_bw_2ghz; - reg_info->max_bw_2ghz = ext_chan_list_event_hdr->max_bw_2ghz; - reg_info->min_bw_5ghz = ext_chan_list_event_hdr->min_bw_5ghz; - reg_info->max_bw_5ghz = ext_chan_list_event_hdr->max_bw_5ghz; + reg_info->min_bw_2ghz = ev->min_bw_2ghz; + reg_info->max_bw_2ghz = ev->max_bw_2ghz; + reg_info->min_bw_5ghz = ev->min_bw_5ghz; + reg_info->max_bw_5ghz = ev->max_bw_5ghz; reg_info->min_bw_6ghz_ap[WMI_REG_INDOOR_AP] = - ext_chan_list_event_hdr->min_bw_6ghz_ap_lpi; + ev->min_bw_6ghz_ap_lpi; reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP] = - ext_chan_list_event_hdr->max_bw_6ghz_ap_lpi; + ev->max_bw_6ghz_ap_lpi; reg_info->min_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = - ext_chan_list_event_hdr->min_bw_6ghz_ap_sp; + ev->min_bw_6ghz_ap_sp; reg_info->max_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = - ext_chan_list_event_hdr->max_bw_6ghz_ap_sp; + ev->max_bw_6ghz_ap_sp; reg_info->min_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = - ext_chan_list_event_hdr->min_bw_6ghz_ap_vlp; + ev->min_bw_6ghz_ap_vlp; reg_info->max_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = - ext_chan_list_event_hdr->max_bw_6ghz_ap_vlp; + ev->max_bw_6ghz_ap_vlp; + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz AP BW: LPI (%d - %d), SP (%d - %d), VLP (%d - %d)\n", + reg_info->min_bw_6ghz_ap[WMI_REG_INDOOR_AP], + reg_info->max_bw_6ghz_ap[WMI_REG_INDOOR_AP], + reg_info->min_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP], + reg_info->max_bw_6ghz_ap[WMI_REG_STANDARD_POWER_AP], + reg_info->min_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP], + reg_info->max_bw_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP]); for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { reg_info->min_bw_6ghz_client[WMI_REG_INDOOR_AP][i] = - ext_chan_list_event_hdr->min_bw_6ghz_client_lpi[i]; + ev->min_bw_6ghz_client_lpi[i]; reg_info->max_bw_6ghz_client[WMI_REG_INDOOR_AP][i] = - ext_chan_list_event_hdr->max_bw_6ghz_client_lpi[i]; + ev->max_bw_6ghz_client_lpi[i]; reg_info->min_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = - ext_chan_list_event_hdr->min_bw_6ghz_client_sp[i]; + ev->min_bw_6ghz_client_sp[i]; reg_info->max_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = - ext_chan_list_event_hdr->max_bw_6ghz_client_sp[i]; + ev->max_bw_6ghz_client_sp[i]; reg_info->min_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = - ext_chan_list_event_hdr->min_bw_6ghz_client_vlp[i]; + ev->min_bw_6ghz_client_vlp[i]; reg_info->max_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = - ext_chan_list_event_hdr->max_bw_6ghz_client_vlp[i]; + ev->max_bw_6ghz_client_vlp[i]; + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz %s BW: LPI (%d - %d), SP (%d - %d), VLP (%d - %d)\n", + ath11k_6ghz_client_type_to_str(i), + reg_info->min_bw_6ghz_client[WMI_REG_INDOOR_AP][i], + reg_info->max_bw_6ghz_client[WMI_REG_INDOOR_AP][i], + reg_info->min_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i], + reg_info->max_bw_6ghz_client[WMI_REG_STANDARD_POWER_AP][i], + reg_info->min_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i], + reg_info->max_bw_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i]); } ath11k_dbg(ab, ATH11K_DBG_WMI, - "%s:cc_ext %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", - __func__, reg_info->alpha2, reg_info->dfs_region, + "cc_ext %s dsf %d BW: min_2ghz %d max_2ghz %d min_5ghz %d max_5ghz %d", + reg_info->alpha2, reg_info->dfs_region, reg_info->min_bw_2ghz, reg_info->max_bw_2ghz, reg_info->min_bw_5ghz, reg_info->max_bw_5ghz); @@ -5310,9 +5362,8 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, num_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][j]); ext_wmi_reg_rule = - (struct wmi_regulatory_ext_rule *)((u8 *)ext_chan_list_event_hdr - + sizeof(*ext_chan_list_event_hdr) - + sizeof(struct wmi_tlv)); + (struct wmi_regulatory_ext_rule *)((u8 *)ev + sizeof(*ev) + + sizeof(struct wmi_tlv)); if (num_2ghz_reg_rules) { reg_info->reg_rules_2ghz_ptr = create_ext_reg_rules_from_wmi(num_2ghz_reg_rules, @@ -5323,6 +5374,10 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, ath11k_warn(ab, "Unable to Allocate memory for 2 GHz rules\n"); return -ENOMEM; } + + ath11k_print_reg_rule(ab, "2 GHz", + num_2ghz_reg_rules, + reg_info->reg_rules_2ghz_ptr); } ext_wmi_reg_rule += num_2ghz_reg_rules; @@ -5358,6 +5413,10 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, ath11k_warn(ab, "Unable to Allocate memory for 5 GHz rules\n"); return -ENOMEM; } + + ath11k_print_reg_rule(ab, "5 GHz", + num_5ghz_reg_rules, + reg_info->reg_rules_5ghz_ptr); } /* We have adjusted the number of 5 GHz reg rules above. But still those @@ -5378,10 +5437,17 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, return -ENOMEM; } + ath11k_print_reg_rule(ab, ath11k_6ghz_ap_type_to_str(i), + num_6ghz_reg_rules_ap[i], + reg_info->reg_rules_6ghz_ap_ptr[i]); + ext_wmi_reg_rule += num_6ghz_reg_rules_ap[i]; } for (j = 0; j < WMI_REG_CURRENT_MAX_AP_TYPE; j++) { + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz AP type %s", ath11k_6ghz_ap_type_to_str(j)); + for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { reg_info->reg_rules_6ghz_client_ptr[j][i] = create_ext_reg_rules_from_wmi(num_6ghz_client[j][i], @@ -5393,35 +5459,58 @@ static int ath11k_pull_reg_chan_list_ext_update_ev(struct ath11k_base *ab, return -ENOMEM; } + ath11k_print_reg_rule(ab, + ath11k_6ghz_client_type_to_str(i), + num_6ghz_client[j][i], + reg_info->reg_rules_6ghz_client_ptr[j][i]); + ext_wmi_reg_rule += num_6ghz_client[j][i]; } } - reg_info->client_type = ext_chan_list_event_hdr->client_type; - reg_info->rnr_tpe_usable = ext_chan_list_event_hdr->rnr_tpe_usable; + reg_info->client_type = ev->client_type; + reg_info->rnr_tpe_usable = ev->rnr_tpe_usable; reg_info->unspecified_ap_usable = - ext_chan_list_event_hdr->unspecified_ap_usable; + ev->unspecified_ap_usable; reg_info->domain_code_6ghz_ap[WMI_REG_INDOOR_AP] = - ext_chan_list_event_hdr->domain_code_6ghz_ap_lpi; + ev->domain_code_6ghz_ap_lpi; reg_info->domain_code_6ghz_ap[WMI_REG_STANDARD_POWER_AP] = - ext_chan_list_event_hdr->domain_code_6ghz_ap_sp; + ev->domain_code_6ghz_ap_sp; reg_info->domain_code_6ghz_ap[WMI_REG_VERY_LOW_POWER_AP] = - ext_chan_list_event_hdr->domain_code_6ghz_ap_vlp; + ev->domain_code_6ghz_ap_vlp; + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz reg info client type %s rnr_tpe_usable %d unspecified_ap_usable %d AP sub domain: lpi %s, sp %s, vlp %s\n", + ath11k_6ghz_client_type_to_str(reg_info->client_type), + reg_info->rnr_tpe_usable, + reg_info->unspecified_ap_usable, + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_ap_lpi), + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_ap_sp), + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_ap_vlp)); for (i = 0; i < WMI_REG_MAX_CLIENT_TYPE; i++) { reg_info->domain_code_6ghz_client[WMI_REG_INDOOR_AP][i] = - ext_chan_list_event_hdr->domain_code_6ghz_client_lpi[i]; + ev->domain_code_6ghz_client_lpi[i]; reg_info->domain_code_6ghz_client[WMI_REG_STANDARD_POWER_AP][i] = - ext_chan_list_event_hdr->domain_code_6ghz_client_sp[i]; + ev->domain_code_6ghz_client_sp[i]; reg_info->domain_code_6ghz_client[WMI_REG_VERY_LOW_POWER_AP][i] = - ext_chan_list_event_hdr->domain_code_6ghz_client_vlp[i]; + ev->domain_code_6ghz_client_vlp[i]; + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz client type %s client sub domain: lpi %s, sp %s, vlp %s\n", + ath11k_6ghz_client_type_to_str(i), + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_client_lpi[i]), + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_client_sp[i]), + ath11k_sub_reg_6ghz_to_str(ev->domain_code_6ghz_client_vlp[i]) + ); } - reg_info->domain_code_6ghz_super_id = - ext_chan_list_event_hdr->domain_code_6ghz_super_id; + reg_info->domain_code_6ghz_super_id = ev->domain_code_6ghz_super_id; - ath11k_dbg(ab, ATH11K_DBG_WMI, "6 GHz client_type: %d domain_code_6ghz_super_id: %d", - reg_info->client_type, reg_info->domain_code_6ghz_super_id); + ath11k_dbg(ab, ATH11K_DBG_WMI, + "6 GHz client_type %s 6 GHz super domain %s", + ath11k_6ghz_client_type_to_str(reg_info->client_type), + ath11k_super_reg_6ghz_to_str(reg_info->domain_code_6ghz_super_id)); ath11k_dbg(ab, ATH11K_DBG_WMI, "processed regulatory ext channel list\n"); diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 3ec3fa0aa5c87..b23b7a22bc9af 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -4139,6 +4139,7 @@ enum cc_setting_code { /* add new setting code above, update in * @enum wmi_reg_cc_setting_code as well. + * Also handle it in ath11k_cc_status_to_str() */ }; @@ -4163,21 +4164,162 @@ ath11k_wmi_cc_setting_code_to_reg(enum wmi_reg_cc_setting_code status_code) return REG_SET_CC_STATUS_FAIL; } +static inline const char *ath11k_cc_status_to_str(enum cc_setting_code code) +{ + switch (code) { + case REG_SET_CC_STATUS_PASS: + return "REG_SET_CC_STATUS_PASS"; + case REG_CURRENT_ALPHA2_NOT_FOUND: + return "REG_CURRENT_ALPHA2_NOT_FOUND"; + case REG_INIT_ALPHA2_NOT_FOUND: + return "REG_INIT_ALPHA2_NOT_FOUND"; + case REG_SET_CC_CHANGE_NOT_ALLOWED: + return "REG_SET_CC_CHANGE_NOT_ALLOWED"; + case REG_SET_CC_STATUS_NO_MEMORY: + return "REG_SET_CC_STATUS_NO_MEMORY"; + case REG_SET_CC_STATUS_FAIL: + return "REG_SET_CC_STATUS_FAIL"; + } + + return "Unknown CC status"; +} + enum wmi_reg_6ghz_ap_type { WMI_REG_INDOOR_AP = 0, WMI_REG_STANDARD_POWER_AP = 1, WMI_REG_VERY_LOW_POWER_AP = 2, + /* add AP type above, handle in ath11k_6ghz_ap_type_to_str() + */ WMI_REG_CURRENT_MAX_AP_TYPE, WMI_REG_MAX_AP_TYPE = 7, }; +static inline const char * +ath11k_6ghz_ap_type_to_str(enum wmi_reg_6ghz_ap_type type) +{ + switch (type) { + case WMI_REG_INDOOR_AP: + return "INDOOR AP"; + case WMI_REG_STANDARD_POWER_AP: + return "STANDARD POWER AP"; + case WMI_REG_VERY_LOW_POWER_AP: + return "VERY LOW POWER AP"; + case WMI_REG_CURRENT_MAX_AP_TYPE: + return "CURRENT_MAX_AP_TYPE"; + case WMI_REG_MAX_AP_TYPE: + return "MAX_AP_TYPE"; + } + + return "unknown 6 GHz AP type"; +} + enum wmi_reg_6ghz_client_type { WMI_REG_DEFAULT_CLIENT = 0, WMI_REG_SUBORDINATE_CLIENT = 1, WMI_REG_MAX_CLIENT_TYPE = 2, + + /* add client type above, handle it in + * ath11k_6ghz_client_type_to_str() + */ }; +static inline const char * +ath11k_6ghz_client_type_to_str(enum wmi_reg_6ghz_client_type type) +{ + switch (type) { + case WMI_REG_DEFAULT_CLIENT: + return "DEFAULT CLIENT"; + case WMI_REG_SUBORDINATE_CLIENT: + return "SUBORDINATE CLIENT"; + case WMI_REG_MAX_CLIENT_TYPE: + return "MAX_CLIENT_TYPE"; + } + + return "unknown 6 GHz client type"; +} + +enum reg_subdomains_6ghz { + EMPTY_6GHZ = 0x0, + FCC1_CLIENT_LPI_REGULAR_6GHZ = 0x01, + FCC1_CLIENT_SP_6GHZ = 0x02, + FCC1_AP_LPI_6GHZ = 0x03, + FCC1_CLIENT_LPI_SUBORDINATE = FCC1_AP_LPI_6GHZ, + FCC1_AP_SP_6GHZ = 0x04, + ETSI1_LPI_6GHZ = 0x10, + ETSI1_VLP_6GHZ = 0x11, + ETSI2_LPI_6GHZ = 0x12, + ETSI2_VLP_6GHZ = 0x13, + APL1_LPI_6GHZ = 0x20, + APL1_VLP_6GHZ = 0x21, + + /* add sub-domain above, handle it in + * ath11k_sub_reg_6ghz_to_str() + */ +}; + +static inline const char * +ath11k_sub_reg_6ghz_to_str(enum reg_subdomains_6ghz sub_id) +{ + switch (sub_id) { + case EMPTY_6GHZ: + return "N/A"; + case FCC1_CLIENT_LPI_REGULAR_6GHZ: + return "FCC1_CLIENT_LPI_REGULAR_6GHZ"; + case FCC1_CLIENT_SP_6GHZ: + return "FCC1_CLIENT_SP_6GHZ"; + case FCC1_AP_LPI_6GHZ: + return "FCC1_AP_LPI_6GHZ/FCC1_CLIENT_LPI_SUBORDINATE"; + case FCC1_AP_SP_6GHZ: + return "FCC1_AP_SP_6GHZ"; + case ETSI1_LPI_6GHZ: + return "ETSI1_LPI_6GHZ"; + case ETSI1_VLP_6GHZ: + return "ETSI1_VLP_6GHZ"; + case ETSI2_LPI_6GHZ: + return "ETSI2_LPI_6GHZ"; + case ETSI2_VLP_6GHZ: + return "ETSI2_VLP_6GHZ"; + case APL1_LPI_6GHZ: + return "APL1_LPI_6GHZ"; + case APL1_VLP_6GHZ: + return "APL1_VLP_6GHZ"; + } + + return "unknown sub reg id"; +} + +enum reg_super_domain_6ghz { + FCC1_6GHZ = 0x01, + ETSI1_6GHZ = 0x02, + ETSI2_6GHZ = 0x03, + APL1_6GHZ = 0x04, + FCC1_6GHZ_CL = 0x05, + + /* add super domain above, handle it in + * ath11k_super_reg_6ghz_to_str() + */ +}; + +static inline const char * +ath11k_super_reg_6ghz_to_str(enum reg_super_domain_6ghz domain_id) +{ + switch (domain_id) { + case FCC1_6GHZ: + return "FCC1_6GHZ"; + case ETSI1_6GHZ: + return "ETSI1_6GHZ"; + case ETSI2_6GHZ: + return "ETSI2_6GHZ"; + case APL1_6GHZ: + return "APL1_6GHZ"; + case FCC1_6GHZ_CL: + return "FCC1_6GHZ_CL"; + } + + return "unknown domain id"; +} + struct cur_reg_rule { u16 start_freq; u16 end_freq; -- GitLab From f79cbc77abde22dd01ac90393a7bfb283d9d50a3 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 27 Feb 2023 14:17:31 +0200 Subject: [PATCH 0364/2078] wifi: move mac80211_hwsim and virt_wifi to virtual directory To clean up drivers/net/wireless move the virtual drivers to a new virtual directory. I did consider adding CONFIG_WLAN_VENDOR_VIRTUAL like other vendors have but then dropped the idea as we are not real drivers. There should be no changes in compilation or in Kconfig options, merely moving files. The order in menuconfig is slightly changed, the virtual drivers are now last in the list. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230227121732.8967-2-kvalo@kernel.org --- drivers/net/wireless/Kconfig | 20 +------------------ drivers/net/wireless/Makefile | 4 +--- drivers/net/wireless/virtual/Kconfig | 20 +++++++++++++++++++ drivers/net/wireless/virtual/Makefile | 3 +++ .../wireless/{ => virtual}/mac80211_hwsim.c | 0 .../wireless/{ => virtual}/mac80211_hwsim.h | 0 .../net/wireless/{ => virtual}/virt_wifi.c | 0 7 files changed, 25 insertions(+), 22 deletions(-) create mode 100644 drivers/net/wireless/virtual/Kconfig create mode 100644 drivers/net/wireless/virtual/Makefile rename drivers/net/wireless/{ => virtual}/mac80211_hwsim.c (100%) rename drivers/net/wireless/{ => virtual}/mac80211_hwsim.h (100%) rename drivers/net/wireless/{ => virtual}/virt_wifi.c (100%) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index cb1c15012dd02..42b40cc96b21b 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -64,19 +64,6 @@ config PCMCIA_WL3501 It has basic support for Linux wireless extensions and initial micro support for ethtool. -config MAC80211_HWSIM - tristate "Simulated radio testing tool for mac80211" - depends on MAC80211 - help - This driver is a developer testing tool that can be used to test - IEEE 802.11 networking stack (mac80211) functionality. This is not - needed for normal wireless LAN usage and is only for testing. See - Documentation/networking/mac80211_hwsim for more information on how - to use this tool. - - To compile this driver as a module, choose M here: the module will be - called mac80211_hwsim. If unsure, say N. - config USB_NET_RNDIS_WLAN tristate "Wireless RNDIS USB support" depends on USB @@ -106,11 +93,6 @@ config USB_NET_RNDIS_WLAN If you choose to build a module, it'll be called rndis_wlan. -config VIRT_WIFI - tristate "Wifi wrapper for ethernet drivers" - depends on CFG80211 - help - This option adds support for ethernet connections to appear as if they - are wifi connections through a special rtnetlink device. +source "drivers/net/wireless/virtual/Kconfig" endif # WLAN diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index a61cf6c903431..1b697cfe0a132 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -29,6 +29,4 @@ obj-$(CONFIG_PCMCIA_WL3501) += wl3501_cs.o obj-$(CONFIG_USB_NET_RNDIS_WLAN) += rndis_wlan.o -obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o - -obj-$(CONFIG_VIRT_WIFI) += virt_wifi.o +obj-$(CONFIG_WLAN) += virtual/ diff --git a/drivers/net/wireless/virtual/Kconfig b/drivers/net/wireless/virtual/Kconfig new file mode 100644 index 0000000000000..fb3b4b69f26b6 --- /dev/null +++ b/drivers/net/wireless/virtual/Kconfig @@ -0,0 +1,20 @@ +config MAC80211_HWSIM + tristate "Simulated radio testing tool for mac80211" + depends on MAC80211 + help + This driver is a developer testing tool that can be used to test + IEEE 802.11 networking stack (mac80211) functionality. This is not + needed for normal wireless LAN usage and is only for testing. See + Documentation/networking/mac80211_hwsim for more information on how + to use this tool. + + To compile this driver as a module, choose M here: the module will be + called mac80211_hwsim. If unsure, say N. + +config VIRT_WIFI + tristate "Wifi wrapper for ethernet drivers" + depends on CFG80211 + help + This option adds support for ethernet connections to appear as if they + are wifi connections through a special rtnetlink device. + diff --git a/drivers/net/wireless/virtual/Makefile b/drivers/net/wireless/virtual/Makefile new file mode 100644 index 0000000000000..5773cc6d643ec --- /dev/null +++ b/drivers/net/wireless/virtual/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o + +obj-$(CONFIG_VIRT_WIFI) += virt_wifi.o diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c similarity index 100% rename from drivers/net/wireless/mac80211_hwsim.c rename to drivers/net/wireless/virtual/mac80211_hwsim.c diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/virtual/mac80211_hwsim.h similarity index 100% rename from drivers/net/wireless/mac80211_hwsim.h rename to drivers/net/wireless/virtual/mac80211_hwsim.h diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c similarity index 100% rename from drivers/net/wireless/virt_wifi.c rename to drivers/net/wireless/virtual/virt_wifi.c -- GitLab From 298e50ad8eb8fa12ea68bb2da45bb8ef4edcd0ec Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 27 Feb 2023 14:17:32 +0200 Subject: [PATCH 0365/2078] wifi: move raycs, wl3501 and rndis_wlan to legacy directory To clean up drivers/net/wireless move the old drivers drivers left in the directory to a new "legacy" directory. I did consider adding CONFIG_WLAN_VENDOR_LEGACY like other vendors have but then dropped the idea as these are really old drivers and hopefully we get to remove them soon. There should be no changes in compilation or in Kconfig options, merely moving files. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230227121732.8967-3-kvalo@kernel.org --- drivers/net/wireless/Kconfig | 55 +------------------ drivers/net/wireless/Makefile | 7 +-- drivers/net/wireless/legacy/Kconfig | 55 +++++++++++++++++++ drivers/net/wireless/legacy/Makefile | 6 ++ drivers/net/wireless/{ => legacy}/ray_cs.c | 0 drivers/net/wireless/{ => legacy}/ray_cs.h | 0 drivers/net/wireless/{ => legacy}/rayctl.h | 0 .../net/wireless/{ => legacy}/rndis_wlan.c | 0 drivers/net/wireless/{ => legacy}/wl3501.h | 0 drivers/net/wireless/{ => legacy}/wl3501_cs.c | 0 10 files changed, 63 insertions(+), 60 deletions(-) create mode 100644 drivers/net/wireless/legacy/Kconfig create mode 100644 drivers/net/wireless/legacy/Makefile rename drivers/net/wireless/{ => legacy}/ray_cs.c (100%) rename drivers/net/wireless/{ => legacy}/ray_cs.h (100%) rename drivers/net/wireless/{ => legacy}/rayctl.h (100%) rename drivers/net/wireless/{ => legacy}/rndis_wlan.c (100%) rename drivers/net/wireless/{ => legacy}/wl3501.h (100%) rename drivers/net/wireless/{ => legacy}/wl3501_cs.c (100%) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 42b40cc96b21b..7555af5195ec3 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -38,60 +38,7 @@ source "drivers/net/wireless/ti/Kconfig" source "drivers/net/wireless/zydas/Kconfig" source "drivers/net/wireless/quantenna/Kconfig" -config PCMCIA_RAYCS - tristate "Aviator/Raytheon 2.4GHz wireless support" - depends on PCMCIA - select WIRELESS_EXT - select WEXT_SPY - select WEXT_PRIV - help - Say Y here if you intend to attach an Aviator/Raytheon PCMCIA - (PC-card) wireless Ethernet networking card to your computer. - Please read the file - for - details. - - To compile this driver as a module, choose M here: the module will be - called ray_cs. If unsure, say N. - -config PCMCIA_WL3501 - tristate "Planet WL3501 PCMCIA cards" - depends on CFG80211 && PCMCIA - select WIRELESS_EXT - select WEXT_SPY - help - A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet. - It has basic support for Linux wireless extensions and initial - micro support for ethtool. - -config USB_NET_RNDIS_WLAN - tristate "Wireless RNDIS USB support" - depends on USB - depends on CFG80211 - select USB_NET_DRIVERS - select USB_USBNET - select USB_NET_CDCETHER - select USB_NET_RNDIS_HOST - help - This is a driver for wireless RNDIS devices. - These are USB based adapters found in devices such as: - - Buffalo WLI-U2-KG125S - U.S. Robotics USR5421 - Belkin F5D7051 - Linksys WUSB54GSv2 - Linksys WUSB54GSC - Asus WL169gE - Eminent EM4045 - BT Voyager 1055 - Linksys WUSB54GSv1 - U.S. Robotics USR5420 - BUFFALO WLI-USB-G54 - - All of these devices are based on Broadcom 4320 chip which is the - only wireless RNDIS chip known to date. - - If you choose to build a module, it'll be called rndis_wlan. +source "drivers/net/wireless/legacy/Kconfig" source "drivers/net/wireless/virtual/Kconfig" diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 1b697cfe0a132..4d7374d567d18 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -23,10 +23,5 @@ obj-$(CONFIG_WLAN_VENDOR_ST) += st/ obj-$(CONFIG_WLAN_VENDOR_TI) += ti/ obj-$(CONFIG_WLAN_VENDOR_ZYDAS) += zydas/ -# 16-bit wireless PCMCIA client drivers -obj-$(CONFIG_PCMCIA_RAYCS) += ray_cs.o -obj-$(CONFIG_PCMCIA_WL3501) += wl3501_cs.o - -obj-$(CONFIG_USB_NET_RNDIS_WLAN) += rndis_wlan.o - +obj-$(CONFIG_WLAN) += legacy/ obj-$(CONFIG_WLAN) += virtual/ diff --git a/drivers/net/wireless/legacy/Kconfig b/drivers/net/wireless/legacy/Kconfig new file mode 100644 index 0000000000000..3a5275941212d --- /dev/null +++ b/drivers/net/wireless/legacy/Kconfig @@ -0,0 +1,55 @@ +config PCMCIA_RAYCS + tristate "Aviator/Raytheon 2.4GHz wireless support" + depends on PCMCIA + select WIRELESS_EXT + select WEXT_SPY + select WEXT_PRIV + help + Say Y here if you intend to attach an Aviator/Raytheon PCMCIA + (PC-card) wireless Ethernet networking card to your computer. + Please read the file + for + details. + + To compile this driver as a module, choose M here: the module will be + called ray_cs. If unsure, say N. + +config PCMCIA_WL3501 + tristate "Planet WL3501 PCMCIA cards" + depends on CFG80211 && PCMCIA + select WIRELESS_EXT + select WEXT_SPY + help + A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet. + It has basic support for Linux wireless extensions and initial + micro support for ethtool. + +config USB_NET_RNDIS_WLAN + tristate "Wireless RNDIS USB support" + depends on USB + depends on CFG80211 + select USB_NET_DRIVERS + select USB_USBNET + select USB_NET_CDCETHER + select USB_NET_RNDIS_HOST + help + This is a driver for wireless RNDIS devices. + These are USB based adapters found in devices such as: + + Buffalo WLI-U2-KG125S + U.S. Robotics USR5421 + Belkin F5D7051 + Linksys WUSB54GSv2 + Linksys WUSB54GSC + Asus WL169gE + Eminent EM4045 + BT Voyager 1055 + Linksys WUSB54GSv1 + U.S. Robotics USR5420 + BUFFALO WLI-USB-G54 + + All of these devices are based on Broadcom 4320 chip which is the + only wireless RNDIS chip known to date. + + If you choose to build a module, it'll be called rndis_wlan. + diff --git a/drivers/net/wireless/legacy/Makefile b/drivers/net/wireless/legacy/Makefile new file mode 100644 index 0000000000000..36878f080bfc5 --- /dev/null +++ b/drivers/net/wireless/legacy/Makefile @@ -0,0 +1,6 @@ +# 16-bit wireless PCMCIA client drivers +obj-$(CONFIG_PCMCIA_RAYCS) += ray_cs.o +obj-$(CONFIG_PCMCIA_WL3501) += wl3501_cs.o + +obj-$(CONFIG_USB_NET_RNDIS_WLAN) += rndis_wlan.o + diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/legacy/ray_cs.c similarity index 100% rename from drivers/net/wireless/ray_cs.c rename to drivers/net/wireless/legacy/ray_cs.c diff --git a/drivers/net/wireless/ray_cs.h b/drivers/net/wireless/legacy/ray_cs.h similarity index 100% rename from drivers/net/wireless/ray_cs.h rename to drivers/net/wireless/legacy/ray_cs.h diff --git a/drivers/net/wireless/rayctl.h b/drivers/net/wireless/legacy/rayctl.h similarity index 100% rename from drivers/net/wireless/rayctl.h rename to drivers/net/wireless/legacy/rayctl.h diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/legacy/rndis_wlan.c similarity index 100% rename from drivers/net/wireless/rndis_wlan.c rename to drivers/net/wireless/legacy/rndis_wlan.c diff --git a/drivers/net/wireless/wl3501.h b/drivers/net/wireless/legacy/wl3501.h similarity index 100% rename from drivers/net/wireless/wl3501.h rename to drivers/net/wireless/legacy/wl3501.h diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/legacy/wl3501_cs.c similarity index 100% rename from drivers/net/wireless/wl3501_cs.c rename to drivers/net/wireless/legacy/wl3501_cs.c -- GitLab From 659fda7f35f0d11126edfb6c6cd9ace99a2525c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Mar 2023 23:23:31 +0100 Subject: [PATCH 0366/2078] wifi: brcmfmac: Use ISO3166 country code and rev 0 as fallback on 4356 Many devices ship with a nvram ccode value of X2/XT/XU/XV/ALL which are all special world-wide compatibility ccode-s. Most of these world-wide ccode-s allow passive scan mode only for 2.4GHz channels 12-14, only enabling them when an AP is seen on them. Since linux-firmware has moved to the new cyfmac4356-pci.bin + cyfmac4356-pci.clm_blob firmware files this no longer works and 4356 devices using e.g. an X2 ccode fail to connect to an AP on channel 13. Add the 4356 chip-id to the list of chips for which to use the ISO3166 country code + rev 0 as fallback in brcmf_translate_country_code() to fix this. Signed-off-by: Hans de Goede Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230303222331.285663-1-hdegoede@redhat.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index e0a70a671550c..548799fefb4bf 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -7881,6 +7881,7 @@ static bool brmcf_use_iso3166_ccode_fallback(struct brcmf_pub *drvr) switch (drvr->bus_if->chip) { case BRCM_CC_43430_CHIP_ID: case BRCM_CC_4345_CHIP_ID: + case BRCM_CC_4356_CHIP_ID: case BRCM_CC_43602_CHIP_ID: return true; default: -- GitLab From df259fc12b366097fc6b7c3ed1a2f9f77b4ba355 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Sun, 5 Mar 2023 18:59:32 +0100 Subject: [PATCH 0367/2078] wifi: rtl8xxxu: mark Edimax EW-7811Un V2 as tested The Edimax V2 (vid 0x7392, pid 0xb811) works well with the rtl8xxxu driver since rtl8188eu support has been added. Remove the untested flag for this device. Signed-off-by: Martin Kaiser Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230305175932.719103-1-martin@kaiser.cx --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 54ca6f2ced3f3..47dcca842d7c3 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -7012,7 +7012,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, } break; case 0x7392: - if (id->idProduct == 0x7811 || id->idProduct == 0xa611) + if (id->idProduct == 0x7811 || id->idProduct == 0xa611 || id->idProduct == 0xb811) untested = 0; break; case 0x050d: -- GitLab From 8a66293e73a520a42a7653d2ca32074ba323ff56 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 7 Mar 2023 22:18:48 +0800 Subject: [PATCH 0368/2078] wifi: rtw89: release RX standby timer of beamformee CSI to save power Originally, we keep RX standby timer to handle beamformee CSI, but this spends power and causes system not entering power save mode. To improve power consumption, release the timer if throughput becomes low. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230307141848.26403-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/mac.c | 35 +++++++++++++++++++++-- drivers/net/wireless/realtek/rtw89/reg.h | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index b1a886898c5a0..9073f526b5a3c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -3168,6 +3168,7 @@ enum rtw89_flags { RTW89_FLAG_RUNNING, RTW89_FLAG_BFEE_MON, RTW89_FLAG_BFEE_EN, + RTW89_FLAG_BFEE_TIMER_KEEP, RTW89_FLAG_NAPI_RUNNING, RTW89_FLAG_LEISURE_PS, RTW89_FLAG_LOW_POWER_MODE, diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 3d1e4ffef1b16..7866fe925d248 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -4931,6 +4931,24 @@ u16 rtw89_mac_get_plt_cnt(struct rtw89_dev *rtwdev, u8 band) return cnt; } +static void rtw89_mac_bfee_standby_timer(struct rtw89_dev *rtwdev, u8 mac_idx, + bool keep) +{ + u32 reg; + + rtw89_debug(rtwdev, RTW89_DBG_BF, "set bfee standby_timer to %d\n", keep); + reg = rtw89_mac_reg_by_idx(R_AX_BFMEE_RESP_OPTION, mac_idx); + if (keep) { + set_bit(RTW89_FLAG_BFEE_TIMER_KEEP, rtwdev->flags); + rtw89_write32_mask(rtwdev, reg, B_AX_BFMEE_BFRP_RX_STANDBY_TIMER_MASK, + BFRP_RX_STANDBY_TIMER_KEEP); + } else { + clear_bit(RTW89_FLAG_BFEE_TIMER_KEEP, rtwdev->flags); + rtw89_write32_mask(rtwdev, reg, B_AX_BFMEE_BFRP_RX_STANDBY_TIMER_MASK, + BFRP_RX_STANDBY_TIMER_RELEASE); + } +} + static void rtw89_mac_bfee_ctrl(struct rtw89_dev *rtwdev, u8 mac_idx, bool en) { u32 reg; @@ -4967,9 +4985,9 @@ static int rtw89_mac_init_bfee(struct rtw89_dev *rtwdev, u8 mac_idx) rtw89_write32(rtwdev, reg, CSI_RRSC_BMAP); reg = rtw89_mac_reg_by_idx(R_AX_BFMEE_RESP_OPTION, mac_idx); - val32 = FIELD_PREP(B_AX_BFMEE_BFRP_RX_STANDBY_TIMER_MASK, BFRP_RX_STANDBY_TIMER); - val32 |= FIELD_PREP(B_AX_BFMEE_NDP_RX_STANDBY_TIMER_MASK, NDP_RX_STANDBY_TIMER); + val32 = FIELD_PREP(B_AX_BFMEE_NDP_RX_STANDBY_TIMER_MASK, NDP_RX_STANDBY_TIMER); rtw89_write32(rtwdev, reg, val32); + rtw89_mac_bfee_standby_timer(rtwdev, mac_idx, true); rtw89_mac_bfee_ctrl(rtwdev, mac_idx, true); reg = rtw89_mac_reg_by_idx(R_AX_TRXPTCL_RESP_CSI_CTRL_0, mac_idx); @@ -5181,6 +5199,19 @@ void _rtw89_mac_bf_monitor_track(struct rtw89_dev *rtwdev) struct rtw89_vif *rtwvif; bool en = stats->tx_tfc_lv <= stats->rx_tfc_lv; bool old = test_bit(RTW89_FLAG_BFEE_EN, rtwdev->flags); + bool keep_timer = true; + bool old_keep_timer; + + old_keep_timer = test_bit(RTW89_FLAG_BFEE_TIMER_KEEP, rtwdev->flags); + + if (stats->tx_tfc_lv <= RTW89_TFC_LOW && stats->rx_tfc_lv <= RTW89_TFC_LOW) + keep_timer = false; + + if (keep_timer != old_keep_timer) { + rtw89_for_each_rtwvif(rtwdev, rtwvif) + rtw89_mac_bfee_standby_timer(rtwdev, rtwvif->mac_idx, + keep_timer); + } if (en == old) return; diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 600257909df27..e5c0ab43ab7a0 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -3056,6 +3056,8 @@ #define R_AX_BFMEE_RESP_OPTION_C1 0xED80 #define B_AX_BFMEE_NDP_RX_STANDBY_TIMER_MASK GENMASK(31, 24) #define B_AX_BFMEE_BFRP_RX_STANDBY_TIMER_MASK GENMASK(23, 20) +#define BFRP_RX_STANDBY_TIMER_KEEP 0x0 +#define BFRP_RX_STANDBY_TIMER_RELEASE 0x1 #define B_AX_MU_BFRPTSEG_SEL_MASK GENMASK(18, 17) #define B_AX_BFMEE_NDP_RXSTDBY_SEL BIT(16) #define BFRP_RX_STANDBY_TIMER 0x0 -- GitLab From 0606b344021a37140ef467ddf30d4b8182e7bbe1 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Tue, 7 Mar 2023 20:57:17 +0100 Subject: [PATCH 0369/2078] wifi: rtl8xxxu: use module_usb_driver We can use the module_usb_driver macro instead of open-coding the driver's init and exit functions. This is simpler and saves some lines of code. Other realtek wireless drivers use module_usb_driver as well. Signed-off-by: Martin Kaiser Reviewed-by: Ping-Ke Shih Tested-by: Philipp Hortmann # Edimax N150 Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230307195718.168021-1-martin@kaiser.cx --- .../wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 47dcca842d7c3..280dc3e26ca72 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -7451,24 +7451,6 @@ static struct usb_driver rtl8xxxu_driver = { .disable_hub_initiated_lpm = 1, }; -static int __init rtl8xxxu_module_init(void) -{ - int res; - - res = usb_register(&rtl8xxxu_driver); - if (res < 0) - pr_err(DRIVER_NAME ": usb_register() failed (%i)\n", res); - - return res; -} - -static void __exit rtl8xxxu_module_exit(void) -{ - usb_deregister(&rtl8xxxu_driver); -} - - MODULE_DEVICE_TABLE(usb, dev_table); -module_init(rtl8xxxu_module_init); -module_exit(rtl8xxxu_module_exit); +module_usb_driver(rtl8xxxu_driver); -- GitLab From a23c82e006db2f073fb65da8762f63735a510409 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 7 Mar 2023 15:01:48 -0800 Subject: [PATCH 0370/2078] wifi: ipw2x00: convert ipw_fw_error->elem to flexible array[] The ipw_fw_error structure contains a payload[] flexible array as well as two pointers to this array area, ->elem, and ->log. The total size of the allocated structure is computed without use of the macros. There's no reason to keep both a payload[] and an extra pointer to both the elem and log members. Convert the elem pointer member into the flexible array member, removing payload. Fix the allocation of the ipw_fw_error structure to use size_add(), struct_size(), and array_size() to compute the allocation. This ensures that any overflow saturates at SIZE_MAX rather than overflowing and potentially allowing an undersized allocation. Before the structure change, the layout of ipw_fw_error was: struct ipw_fw_error { long unsigned int jiffies; /* 0 8 */ u32 status; /* 8 4 */ u32 config; /* 12 4 */ u32 elem_len; /* 16 4 */ u32 log_len; /* 20 4 */ struct ipw_error_elem * elem; /* 24 8 */ struct ipw_event * log; /* 32 8 */ u8 payload[]; /* 40 0 */ /* size: 40, cachelines: 1, members: 8 */ /* last cacheline: 40 bytes */ }; After this change, the layout is now: struct ipw_fw_error { long unsigned int jiffies; /* 0 8 */ u32 status; /* 8 4 */ u32 config; /* 12 4 */ u32 elem_len; /* 16 4 */ u32 log_len; /* 20 4 */ struct ipw_event * log; /* 24 8 */ struct ipw_error_elem elem[]; /* 32 0 */ /* size: 32, cachelines: 1, members: 7 */ /* last cacheline: 32 bytes */ }; This saves a total of 8 bytes for every ipw_fw_error allocation, and removes the risk of a potential overflow on the allocation. Signed-off-by: Jacob Keller Cc: Stanislav Yakovlev Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230307230148.3735684-1-jacob.e.keller@intel.com --- drivers/net/wireless/intel/ipw2x00/ipw2200.c | 7 +++---- drivers/net/wireless/intel/ipw2x00/ipw2200.h | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index d382f20173256..b91b1a2d0be7a 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1234,9 +1234,9 @@ static struct ipw_fw_error *ipw_alloc_error_log(struct ipw_priv *priv) u32 base = ipw_read32(priv, IPW_ERROR_LOG); u32 elem_len = ipw_read_reg32(priv, base); - error = kmalloc(sizeof(*error) + - sizeof(*error->elem) * elem_len + - sizeof(*error->log) * log_len, GFP_ATOMIC); + error = kmalloc(size_add(struct_size(error, elem, elem_len), + array_size(sizeof(*error->log), log_len)), + GFP_ATOMIC); if (!error) { IPW_ERROR("Memory allocation for firmware error log " "failed.\n"); @@ -1247,7 +1247,6 @@ static struct ipw_fw_error *ipw_alloc_error_log(struct ipw_priv *priv) error->config = priv->config; error->elem_len = elem_len; error->log_len = log_len; - error->elem = (struct ipw_error_elem *)error->payload; error->log = (struct ipw_event *)(error->elem + elem_len); ipw_capture_event_log(priv, log_len, error->log); diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.h b/drivers/net/wireless/intel/ipw2x00/ipw2200.h index 09ddd21608d44..8ebf09121e173 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.h +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.h @@ -1106,9 +1106,8 @@ struct ipw_fw_error { /* XXX */ u32 config; u32 elem_len; u32 log_len; - struct ipw_error_elem *elem; struct ipw_event *log; - u8 payload[]; + struct ipw_error_elem elem[]; } __packed; #ifdef CONFIG_IPW2200_PROMISCUOUS -- GitLab From 84e9e2102bdc4f60ce131dcb74046219dc4425ae Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 7 Mar 2023 15:02:12 -0800 Subject: [PATCH 0371/2078] wifi: qtnfmac: use struct_size and size_sub for payload length Replace the calculations for the payload length in qtnf_cmd_band_fill_iftype with struct_size() and size_sub(). While the payload length does not get directly passed to an allocation function, the performed calculation is still calculating the size of a flexible array structure (minus the size of a header structure). Signed-off-by: Jacob Keller Cc: Igor Mitsyanko Cc: Sergey Matyukevich Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230307230212.3735818-1-jacob.e.keller@intel.com --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index b1b73478d89b5..68ae9c7ea95ab 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1325,9 +1325,10 @@ static int qtnf_cmd_band_fill_iftype(const u8 *data, struct ieee80211_sband_iftype_data *iftype_data; const struct qlink_tlv_iftype_data *tlv = (const struct qlink_tlv_iftype_data *)data; - size_t payload_len = tlv->n_iftype_data * sizeof(*tlv->iftype_data) + - sizeof(*tlv) - - sizeof(struct qlink_tlv_hdr); + size_t payload_len; + + payload_len = struct_size(tlv, iftype_data, tlv->n_iftype_data); + payload_len = size_sub(payload_len, sizeof(struct qlink_tlv_hdr)); if (tlv->hdr.len != cpu_to_le16(payload_len)) { pr_err("bad IFTYPE_DATA TLV len %u\n", tlv->hdr.len); -- GitLab From e49bdd85c92dacb12151aa1b9cf48b81c81a6f98 Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:19 +0800 Subject: [PATCH 0372/2078] wifi: rtw89: coex: Add more error_map and counter to log The error map and counter can help to analyze is coexistence mechanism going well or not. For example, if there is E2G (External control Wi-Fi slot for Wi-Fi 2.4 GHz) hang counter, it means Wi-Fi firmware didn't cut a slot for Wi-Fi 2.4 GHz. Maybe something wrong with firmware timer. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 112 +++++++++++++--------- drivers/net/wireless/realtek/rtw89/core.h | 31 ++++-- 2 files changed, 88 insertions(+), 55 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index bcf483cafd203..f80952e6e11ba 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -734,6 +734,7 @@ static void _reset_btc_var(struct rtw89_dev *rtwdev, u8 type) #define BTC_RPT_HDR_SIZE 3 #define BTC_CHK_WLSLOT_DRIFT_MAX 15 +#define BTC_CHK_BTSLOT_DRIFT_MAX 15 #define BTC_CHK_HANG_MAX 3 static void _chk_btc_err(struct rtw89_dev *rtwdev, u8 type, u32 cnt) @@ -748,62 +749,76 @@ static void _chk_btc_err(struct rtw89_dev *rtwdev, u8 type, u32 cnt) __func__, type, cnt); switch (type) { - case BTC_DCNT_RPT_FREEZE: + case BTC_DCNT_RPT_HANG: if (dm->cnt_dm[BTC_DCNT_RPT] == cnt && btc->fwinfo.rpt_en_map) - dm->cnt_dm[BTC_DCNT_RPT_FREEZE]++; + dm->cnt_dm[BTC_DCNT_RPT_HANG]++; else - dm->cnt_dm[BTC_DCNT_RPT_FREEZE] = 0; + dm->cnt_dm[BTC_DCNT_RPT_HANG] = 0; - if (dm->cnt_dm[BTC_DCNT_RPT_FREEZE] >= BTC_CHK_HANG_MAX) + if (dm->cnt_dm[BTC_DCNT_RPT_HANG] >= BTC_CHK_HANG_MAX) dm->error.map.wl_fw_hang = true; else dm->error.map.wl_fw_hang = false; dm->cnt_dm[BTC_DCNT_RPT] = cnt; break; - case BTC_DCNT_CYCLE_FREEZE: + case BTC_DCNT_CYCLE_HANG: if (dm->cnt_dm[BTC_DCNT_CYCLE] == cnt && (dm->tdma_now.type != CXTDMA_OFF || dm->tdma_now.ext_ctrl == CXECTL_EXT)) - dm->cnt_dm[BTC_DCNT_CYCLE_FREEZE]++; + dm->cnt_dm[BTC_DCNT_CYCLE_HANG]++; else - dm->cnt_dm[BTC_DCNT_CYCLE_FREEZE] = 0; + dm->cnt_dm[BTC_DCNT_CYCLE_HANG] = 0; - if (dm->cnt_dm[BTC_DCNT_CYCLE_FREEZE] >= BTC_CHK_HANG_MAX) + if (dm->cnt_dm[BTC_DCNT_CYCLE_HANG] >= BTC_CHK_HANG_MAX) dm->error.map.cycle_hang = true; else dm->error.map.cycle_hang = false; dm->cnt_dm[BTC_DCNT_CYCLE] = cnt; break; - case BTC_DCNT_W1_FREEZE: + case BTC_DCNT_W1_HANG: if (dm->cnt_dm[BTC_DCNT_W1] == cnt && dm->tdma_now.type != CXTDMA_OFF) - dm->cnt_dm[BTC_DCNT_W1_FREEZE]++; + dm->cnt_dm[BTC_DCNT_W1_HANG]++; else - dm->cnt_dm[BTC_DCNT_W1_FREEZE] = 0; + dm->cnt_dm[BTC_DCNT_W1_HANG] = 0; - if (dm->cnt_dm[BTC_DCNT_W1_FREEZE] >= BTC_CHK_HANG_MAX) + if (dm->cnt_dm[BTC_DCNT_W1_HANG] >= BTC_CHK_HANG_MAX) dm->error.map.w1_hang = true; else dm->error.map.w1_hang = false; dm->cnt_dm[BTC_DCNT_W1] = cnt; break; - case BTC_DCNT_B1_FREEZE: + case BTC_DCNT_B1_HANG: if (dm->cnt_dm[BTC_DCNT_B1] == cnt && dm->tdma_now.type != CXTDMA_OFF) - dm->cnt_dm[BTC_DCNT_B1_FREEZE]++; + dm->cnt_dm[BTC_DCNT_B1_HANG]++; else - dm->cnt_dm[BTC_DCNT_B1_FREEZE] = 0; + dm->cnt_dm[BTC_DCNT_B1_HANG] = 0; - if (dm->cnt_dm[BTC_DCNT_B1_FREEZE] >= BTC_CHK_HANG_MAX) + if (dm->cnt_dm[BTC_DCNT_B1_HANG] >= BTC_CHK_HANG_MAX) dm->error.map.b1_hang = true; else dm->error.map.b1_hang = false; dm->cnt_dm[BTC_DCNT_B1] = cnt; break; + case BTC_DCNT_E2G_HANG: + if (dm->cnt_dm[BTC_DCNT_E2G] == cnt && + dm->tdma_now.ext_ctrl == CXECTL_EXT) + dm->cnt_dm[BTC_DCNT_E2G_HANG]++; + else + dm->cnt_dm[BTC_DCNT_E2G_HANG] = 0; + + if (dm->cnt_dm[BTC_DCNT_E2G_HANG] >= BTC_CHK_HANG_MAX) + dm->error.map.wl_e2g_hang = true; + else + dm->error.map.wl_e2g_hang = false; + + dm->cnt_dm[BTC_DCNT_E2G] = cnt; + break; case BTC_DCNT_TDMA_NONSYNC: if (cnt != 0) /* if tdma not sync between drv/fw */ dm->cnt_dm[BTC_DCNT_TDMA_NONSYNC]++; @@ -822,23 +837,23 @@ static void _chk_btc_err(struct rtw89_dev *rtwdev, u8 type, u32 cnt) dm->cnt_dm[BTC_DCNT_SLOT_NONSYNC] = 0; if (dm->cnt_dm[BTC_DCNT_SLOT_NONSYNC] >= BTC_CHK_HANG_MAX) - dm->error.map.tdma_no_sync = true; + dm->error.map.slot_no_sync = true; else - dm->error.map.tdma_no_sync = false; + dm->error.map.slot_no_sync = false; break; - case BTC_DCNT_BTCNT_FREEZE: + case BTC_DCNT_BTCNT_HANG: cnt = cx->cnt_bt[BTC_BCNT_HIPRI_RX] + cx->cnt_bt[BTC_BCNT_HIPRI_TX] + cx->cnt_bt[BTC_BCNT_LOPRI_RX] + cx->cnt_bt[BTC_BCNT_LOPRI_TX]; if (cnt == 0) - dm->cnt_dm[BTC_DCNT_BTCNT_FREEZE]++; + dm->cnt_dm[BTC_DCNT_BTCNT_HANG]++; else - dm->cnt_dm[BTC_DCNT_BTCNT_FREEZE] = 0; + dm->cnt_dm[BTC_DCNT_BTCNT_HANG] = 0; - if ((dm->cnt_dm[BTC_DCNT_BTCNT_FREEZE] >= BTC_CHK_HANG_MAX && - bt->enable.now) || (!dm->cnt_dm[BTC_DCNT_BTCNT_FREEZE] && + if ((dm->cnt_dm[BTC_DCNT_BTCNT_HANG] >= BTC_CHK_HANG_MAX && + bt->enable.now) || (!dm->cnt_dm[BTC_DCNT_BTCNT_HANG] && !bt->enable.now)) _update_bt_scbd(rtwdev, false); break; @@ -853,6 +868,18 @@ static void _chk_btc_err(struct rtw89_dev *rtwdev, u8 type, u32 cnt) else dm->error.map.wl_slot_drift = false; break; + case BTC_DCNT_BT_SLOT_DRIFT: + if (cnt >= BTC_CHK_BTSLOT_DRIFT_MAX) + dm->cnt_dm[BTC_DCNT_BT_SLOT_DRIFT]++; + else + dm->cnt_dm[BTC_DCNT_BT_SLOT_DRIFT] = 0; + + if (dm->cnt_dm[BTC_DCNT_BT_SLOT_DRIFT] >= BTC_CHK_HANG_MAX) + dm->error.map.bt_slot_drift = true; + else + dm->error.map.bt_slot_drift = false; + + break; } } @@ -1129,14 +1156,14 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, wl->ver_info.fw = prpt->v1.wl_fw_ver; dm->wl_fw_cx_offload = !!prpt->v1.wl_fw_cx_offload; - _chk_btc_err(rtwdev, BTC_DCNT_RPT_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_RPT_HANG, pfwinfo->event[BTF_EVNT_RPT]); /* To avoid I/O if WL LPS or power-off */ if (wl->status.map.lps != BTC_LPS_RF_OFF && !wl->status.map.rf_off) { rtwdev->chip->ops->btc_update_bt_cnt(rtwdev); - _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_FREEZE, 0); + _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_HANG, 0); btc->cx.cnt_bt[BTC_BCNT_POLUT] = rtw89_mac_get_plt_cnt(rtwdev, @@ -1164,8 +1191,8 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, btc->cx.cnt_bt[BTC_BCNT_POLUT] = le32_to_cpu(prpt->v4.bt_cnt[BTC_BCNT_POLLUTED]); - _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_FREEZE, 0); - _chk_btc_err(rtwdev, BTC_DCNT_RPT_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_HANG, 0); + _chk_btc_err(rtwdev, BTC_DCNT_RPT_HANG, pfwinfo->event[BTF_EVNT_RPT]); if (le32_to_cpu(prpt->v4.bt_cnt[BTC_BCNT_RFK_TIMEOUT]) > 0) @@ -1196,8 +1223,8 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, btc->cx.cnt_bt[BTC_BCNT_POLUT] = le16_to_cpu(prpt->v5.bt_cnt[BTC_BCNT_POLLUTED]); - _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_FREEZE, 0); - _chk_btc_err(rtwdev, BTC_DCNT_RPT_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_BTCNT_HANG, 0); + _chk_btc_err(rtwdev, BTC_DCNT_RPT_HANG, pfwinfo->event[BTF_EVNT_RPT]); dm->error.map.bt_rfk_timeout = bt->rfk_info.map.timeout; @@ -1258,11 +1285,11 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, BTC_DCNT_WL_SLOT_DRIFT, diff_t); } - _chk_btc_err(rtwdev, BTC_DCNT_W1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_W1_HANG, le32_to_cpu(pcysta->v2.slot_cnt[CXST_W1])); - _chk_btc_err(rtwdev, BTC_DCNT_W1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_W1_HANG, le32_to_cpu(pcysta->v2.slot_cnt[CXST_B1])); - _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_HANG, le16_to_cpu(pcysta->v2.cycles)); } else if (ver->fcxcysta == 3) { if (le16_to_cpu(pcysta->v3.cycles) < BTC_CYSTA_CHK_PERIOD) @@ -1299,11 +1326,11 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, } } - _chk_btc_err(rtwdev, BTC_DCNT_W1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_W1_HANG, le32_to_cpu(pcysta->v3.slot_cnt[CXST_W1])); - _chk_btc_err(rtwdev, BTC_DCNT_B1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_B1_HANG, le32_to_cpu(pcysta->v3.slot_cnt[CXST_B1])); - _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_HANG, le16_to_cpu(pcysta->v3.cycles)); } else if (ver->fcxcysta == 4) { if (le16_to_cpu(pcysta->v4.cycles) < BTC_CYSTA_CHK_PERIOD) @@ -1341,11 +1368,11 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, } } - _chk_btc_err(rtwdev, BTC_DCNT_W1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_W1_HANG, le16_to_cpu(pcysta->v4.slot_cnt[CXST_W1])); - _chk_btc_err(rtwdev, BTC_DCNT_B1_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_B1_HANG, le16_to_cpu(pcysta->v4.slot_cnt[CXST_B1])); - _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_FREEZE, + _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_HANG, le16_to_cpu(pcysta->v4.cycles)); } else { goto err; @@ -4578,7 +4605,7 @@ static void _update_bt_scbd(struct rtw89_dev *rtwdev, bool only_update) } if (!(val & BTC_BSCB_ON) || - btc->dm.cnt_dm[BTC_DCNT_BTCNT_FREEZE] >= BTC_CHK_HANG_MAX) + btc->dm.cnt_dm[BTC_DCNT_BTCNT_HANG] >= BTC_CHK_HANG_MAX) bt->enable.now = 0; else bt->enable.now = 1; @@ -5349,7 +5376,7 @@ void rtw89_btc_ntfy_radio_state(struct rtw89_dev *rtwdev, enum btc_rfctrl rf_sta _write_scbd(rtwdev, BTC_WSCB_ALL, false); } - btc->dm.cnt_dm[BTC_DCNT_BTCNT_FREEZE] = 0; + btc->dm.cnt_dm[BTC_DCNT_BTCNT_HANG] = 0; if (wl->status.map.lps_pre == BTC_LPS_OFF && wl->status.map.lps_pre != wl->status.map.lps) btc->dm.tdma_instant_excute = 1; @@ -5700,11 +5727,6 @@ static void _show_cx_info(struct rtw89_dev *rtwdev, struct seq_file *m) seq_printf(m, " %-15s : Coex:%d.%d.%d(branch:%d), ", "[coex_version]", ver_main, ver_sub, ver_hotfix, id_branch); - if (dm->wl_fw_cx_offload != BTC_CX_FW_OFFLOAD) - dm->error.map.offload_mismatch = true; - else - dm->error.map.offload_mismatch = false; - ver_main = FIELD_GET(GENMASK(31, 24), wl->ver_info.fw_coex); ver_sub = FIELD_GET(GENMASK(23, 16), wl->ver_info.fw_coex); ver_hotfix = FIELD_GET(GENMASK(15, 8), wl->ver_info.fw_coex); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 9073f526b5a3c..f998f8a3f2774 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -883,20 +883,24 @@ enum rtw89_btc_dcnt { BTC_DCNT_RUN = 0x0, BTC_DCNT_CX_RUNINFO, BTC_DCNT_RPT, - BTC_DCNT_RPT_FREEZE, + BTC_DCNT_RPT_HANG, BTC_DCNT_CYCLE, - BTC_DCNT_CYCLE_FREEZE, + BTC_DCNT_CYCLE_HANG, BTC_DCNT_W1, - BTC_DCNT_W1_FREEZE, + BTC_DCNT_W1_HANG, BTC_DCNT_B1, - BTC_DCNT_B1_FREEZE, + BTC_DCNT_B1_HANG, BTC_DCNT_TDMA_NONSYNC, BTC_DCNT_SLOT_NONSYNC, - BTC_DCNT_BTCNT_FREEZE, + BTC_DCNT_BTCNT_HANG, BTC_DCNT_WL_SLOT_DRIFT, - BTC_DCNT_BT_SLOT_DRIFT, BTC_DCNT_WL_STA_LAST, - BTC_DCNT_NUM, + BTC_DCNT_BT_SLOT_DRIFT, + BTC_DCNT_BT_SLOT_FLOOD, + BTC_DCNT_FDDT_TRIG, + BTC_DCNT_E2G, + BTC_DCNT_E2G_HANG, + BTC_DCNT_NUM }; enum rtw89_btc_wl_state_cnt { @@ -1302,15 +1306,22 @@ struct rtw89_btc_dm_emap { u32 pta_owner: 1; u32 wl_rfk_timeout: 1; u32 bt_rfk_timeout: 1; - u32 wl_fw_hang: 1; - u32 offload_mismatch: 1; u32 cycle_hang: 1; u32 w1_hang: 1; - u32 b1_hang: 1; u32 tdma_no_sync: 1; + u32 slot_no_sync: 1; u32 wl_slot_drift: 1; + u32 bt_slot_drift: 1; + u32 role_num_mismatch: 1; + u32 null1_tx_late: 1; + u32 bt_afh_conflict: 1; + u32 bt_leafh_conflict: 1; + u32 bt_slot_flood: 1; + u32 wl_e2g_hang: 1; + u32 wl_ver_mismatch: 1; + u32 bt_ver_mismatch: 1; }; union rtw89_btc_dm_error_map { -- GitLab From 5049964c4af86865153c553fc6a138df02685ffb Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:20 +0800 Subject: [PATCH 0373/2078] wifi: rtw89: coex: Add WiFi role info v2 Remove WiFi traffic busy level & traffic rate from active role information. This information will move to v5 version TDMA cycle info. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 277 +++++++++++++++++++++- drivers/net/wireless/realtek/rtw89/core.h | 32 +++ drivers/net/wireless/realtek/rtw89/fw.c | 86 +++++++ drivers/net/wireless/realtek/rtw89/fw.h | 51 ++++ 4 files changed, 439 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index f80952e6e11ba..b56897529504a 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -1819,6 +1819,8 @@ static void _fw_set_drv_info(struct rtw89_dev *rtwdev, u8 type) rtw89_fw_h2c_cxdrv_role(rtwdev); else if (ver->fwlrole == 1) rtw89_fw_h2c_cxdrv_role_v1(rtwdev); + else if (ver->fwlrole == 2) + rtw89_fw_h2c_cxdrv_role_v2(rtwdev); break; case CXDRVINFO_CTRL: rtw89_fw_h2c_cxdrv_ctrl(rtwdev); @@ -2113,8 +2115,10 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) struct rtw89_btc_bt_link_info *b = &bt->link_info; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; struct rtw89_btc_wl_active_role *r; struct rtw89_btc_wl_active_role_v1 *r1; + struct rtw89_btc_wl_active_role_v2 *r2; u8 en = 0, i, ch = 0, bw = 0; u8 mode, connect_cnt; @@ -2124,9 +2128,14 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) if (ver->fwlrole == 0) { mode = wl_rinfo->link_mode; connect_cnt = wl_rinfo->connect_cnt; - } else { + } else if (ver->fwlrole == 1) { mode = wl_rinfo_v1->link_mode; connect_cnt = wl_rinfo_v1->connect_cnt; + } else if (ver->fwlrole == 2) { + mode = wl_rinfo_v2->link_mode; + connect_cnt = wl_rinfo_v2->connect_cnt; + } else { + return; } if (wl->status.map.rf_off || bt->whql_test || @@ -2139,6 +2148,7 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) for (i = 0; i < RTW89_PORT_NUM; i++) { r = &wl_rinfo->active_role[i]; r1 = &wl_rinfo_v1->active_role_v1[i]; + r2 = &wl_rinfo_v2->active_role_v2[i]; if (ver->fwlrole == 0 && (r->role == RTW89_WIFI_ROLE_P2P_GO || @@ -2152,6 +2162,12 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) ch = r1->ch; bw = r1->bw; break; + } else if (ver->fwlrole == 2 && + (r2->role == RTW89_WIFI_ROLE_P2P_GO || + r2->role == RTW89_WIFI_ROLE_P2P_CLIENT)) { + ch = r2->ch; + bw = r2->bw; + break; } } } else { @@ -2160,6 +2176,7 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) for (i = 0; i < RTW89_PORT_NUM; i++) { r = &wl_rinfo->active_role[i]; r1 = &wl_rinfo_v1->active_role_v1[i]; + r2 = &wl_rinfo_v2->active_role_v2[i]; if (ver->fwlrole == 0 && r->connected && r->band == RTW89_BAND_2G) { @@ -2171,6 +2188,11 @@ static void _set_bt_afh_info(struct rtw89_dev *rtwdev) ch = r1->ch; bw = r1->bw; break; + } else if (ver->fwlrole == 2 && + r2->connected && r2->band == RTW89_BAND_2G) { + ch = r2->ch; + bw = r2->bw; + break; } } } @@ -3625,6 +3647,7 @@ static void _set_btg_ctrl(struct rtw89_dev *rtwdev) struct rtw89_btc_wl_info *wl = &btc->cx.wl; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; struct rtw89_btc_wl_dbcc_info *wl_dinfo = &wl->dbcc_info; bool is_btg; u8 mode; @@ -3634,8 +3657,12 @@ static void _set_btg_ctrl(struct rtw89_dev *rtwdev) if (ver->fwlrole == 0) mode = wl_rinfo->link_mode; - else + else if (ver->fwlrole == 1) mode = wl_rinfo_v1->link_mode; + else if (ver->fwlrole == 2) + mode = wl_rinfo_v2->link_mode; + else + return; /* notify halbb ignore GNT_BT or not for WL BB Rx-AGC control */ if (mode == BTC_WLINK_5G) /* always 0 if 5G */ @@ -3736,6 +3763,7 @@ static void _set_wl_tx_limit(struct rtw89_dev *rtwdev) struct rtw89_btc_bt_hid_desc *hid = &b->hid_desc; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; struct rtw89_txtime_data data = {.rtwdev = rtwdev}; u8 mode; u8 tx_retry; @@ -3748,8 +3776,12 @@ static void _set_wl_tx_limit(struct rtw89_dev *rtwdev) if (ver->fwlrole == 0) mode = wl_rinfo->link_mode; - else + else if (ver->fwlrole == 1) mode = wl_rinfo_v1->link_mode; + else if (ver->fwlrole == 2) + mode = wl_rinfo_v2->link_mode; + else + return; if (btc->dm.freerun || btc->ctrl.igno_bt || b->profile_cnt.now == 0 || mode == BTC_WLINK_5G || mode == BTC_WLINK_NOLINK) { @@ -3799,14 +3831,19 @@ static void _set_bt_rx_agc(struct rtw89_dev *rtwdev) struct rtw89_btc_wl_info *wl = &btc->cx.wl; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; struct rtw89_btc_bt_info *bt = &btc->cx.bt; bool bt_hi_lna_rx = false; u8 mode; if (ver->fwlrole == 0) mode = wl_rinfo->link_mode; - else + else if (ver->fwlrole == 1) mode = wl_rinfo_v1->link_mode; + else if (ver->fwlrole == 2) + mode = wl_rinfo_v2->link_mode; + else + return; if (mode != BTC_WLINK_NOLINK && btc->dm.wl_btg_rx) bt_hi_lna_rx = true; @@ -4079,6 +4116,68 @@ static void _action_wl_2g_scc_v1(struct rtw89_dev *rtwdev) _set_policy(rtwdev, policy_type, BTC_ACT_WL_2G_SCC); } +static void _action_wl_2g_scc_v2(struct rtw89_dev *rtwdev) +{ + struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_wl_info *wl = &btc->cx.wl; + struct rtw89_btc_bt_info *bt = &btc->cx.bt; + struct rtw89_btc_dm *dm = &btc->dm; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo = &wl->role_info_v2; + u16 policy_type = BTC_CXP_OFF_BT; + u32 dur; + + if (btc->mdinfo.ant.type == BTC_ANT_DEDICATED) { + policy_type = BTC_CXP_OFF_EQ0; + } else { + /* shared-antenna */ + switch (wl_rinfo->mrole_type) { + case BTC_WLMROLE_STA_GC: + dm->wl_scc.null_role1 = RTW89_WIFI_ROLE_STATION; + dm->wl_scc.null_role2 = RTW89_WIFI_ROLE_P2P_CLIENT; + dm->wl_scc.ebt_null = 0; /* no ext-slot-control */ + _action_by_bt(rtwdev); + return; + case BTC_WLMROLE_STA_STA: + dm->wl_scc.null_role1 = RTW89_WIFI_ROLE_STATION; + dm->wl_scc.null_role2 = RTW89_WIFI_ROLE_STATION; + dm->wl_scc.ebt_null = 0; /* no ext-slot-control */ + _action_by_bt(rtwdev); + return; + case BTC_WLMROLE_STA_GC_NOA: + case BTC_WLMROLE_STA_GO: + case BTC_WLMROLE_STA_GO_NOA: + dm->wl_scc.null_role1 = RTW89_WIFI_ROLE_STATION; + dm->wl_scc.null_role2 = RTW89_WIFI_ROLE_NONE; + dur = wl_rinfo->mrole_noa_duration; + + if (wl->status.map._4way) { + dm->wl_scc.ebt_null = 0; + policy_type = BTC_CXP_OFFE_WL; + } else if (bt->link_info.status.map.connect == 0) { + dm->wl_scc.ebt_null = 0; + policy_type = BTC_CXP_OFFE_2GISOB; + } else if (bt->link_info.a2dp_desc.exist && + dur < btc->bt_req_len) { + dm->wl_scc.ebt_null = 1; /* tx null at EBT */ + policy_type = BTC_CXP_OFFE_2GBWMIXB2; + } else if (bt->link_info.a2dp_desc.exist || + bt->link_info.pan_desc.exist) { + dm->wl_scc.ebt_null = 1; /* tx null at EBT */ + policy_type = BTC_CXP_OFFE_2GBWISOB; + } else { + dm->wl_scc.ebt_null = 0; + policy_type = BTC_CXP_OFFE_2GBWISOB; + } + break; + default: + break; + } + } + + _set_ant(rtwdev, NM_EXEC, BTC_PHY_ALL, BTC_ANT_W2G); + _set_policy(rtwdev, policy_type, BTC_ACT_WL_2G_SCC); +} + static void _action_wl_2g_ap(struct rtw89_dev *rtwdev) { struct rtw89_btc *btc = &rtwdev->btc; @@ -4518,6 +4617,156 @@ static void _update_wl_info_v1(struct rtw89_dev *rtwdev) _fw_set_drv_info(rtwdev, CXDRVINFO_ROLE); } +static void _update_wl_info_v2(struct rtw89_dev *rtwdev) +{ + struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_wl_info *wl = &btc->cx.wl; + struct rtw89_btc_wl_link_info *wl_linfo = wl->link_info; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo = &wl->role_info_v2; + struct rtw89_btc_wl_dbcc_info *wl_dinfo = &wl->dbcc_info; + u8 cnt_connect = 0, cnt_connecting = 0, cnt_active = 0; + u8 cnt_2g = 0, cnt_5g = 0, phy; + u32 wl_2g_ch[2] = {}, wl_5g_ch[2] = {}; + bool b2g = false, b5g = false, client_joined = false; + u8 i; + + memset(wl_rinfo, 0, sizeof(*wl_rinfo)); + + for (i = 0; i < RTW89_PORT_NUM; i++) { + if (!wl_linfo[i].active) + continue; + + cnt_active++; + wl_rinfo->active_role_v2[cnt_active - 1].role = wl_linfo[i].role; + wl_rinfo->active_role_v2[cnt_active - 1].pid = wl_linfo[i].pid; + wl_rinfo->active_role_v2[cnt_active - 1].phy = wl_linfo[i].phy; + wl_rinfo->active_role_v2[cnt_active - 1].band = wl_linfo[i].band; + wl_rinfo->active_role_v2[cnt_active - 1].noa = (u8)wl_linfo[i].noa; + wl_rinfo->active_role_v2[cnt_active - 1].connected = 0; + + wl->port_id[wl_linfo[i].role] = wl_linfo[i].pid; + + phy = wl_linfo[i].phy; + + if (rtwdev->dbcc_en && phy < RTW89_PHY_MAX) { + wl_dinfo->role[phy] = wl_linfo[i].role; + wl_dinfo->op_band[phy] = wl_linfo[i].band; + _update_dbcc_band(rtwdev, phy); + _fw_set_drv_info(rtwdev, CXDRVINFO_DBCC); + } + + if (wl_linfo[i].connected == MLME_NO_LINK) { + continue; + } else if (wl_linfo[i].connected == MLME_LINKING) { + cnt_connecting++; + } else { + cnt_connect++; + if ((wl_linfo[i].role == RTW89_WIFI_ROLE_P2P_GO || + wl_linfo[i].role == RTW89_WIFI_ROLE_AP) && + wl_linfo[i].client_cnt > 1) + client_joined = true; + } + + wl_rinfo->role_map.val |= BIT(wl_linfo[i].role); + wl_rinfo->active_role_v2[cnt_active - 1].ch = wl_linfo[i].ch; + wl_rinfo->active_role_v2[cnt_active - 1].bw = wl_linfo[i].bw; + wl_rinfo->active_role_v2[cnt_active - 1].connected = 1; + + /* only care 2 roles + BT coex */ + if (wl_linfo[i].band != RTW89_BAND_2G) { + if (cnt_5g <= ARRAY_SIZE(wl_5g_ch) - 1) + wl_5g_ch[cnt_5g] = wl_linfo[i].ch; + cnt_5g++; + b5g = true; + } else { + if (cnt_2g <= ARRAY_SIZE(wl_2g_ch) - 1) + wl_2g_ch[cnt_2g] = wl_linfo[i].ch; + cnt_2g++; + b2g = true; + } + } + + wl_rinfo->connect_cnt = cnt_connect; + + /* Be careful to change the following sequence!! */ + if (cnt_connect == 0) { + wl_rinfo->link_mode = BTC_WLINK_NOLINK; + wl_rinfo->role_map.role.none = 1; + } else if (!b2g && b5g) { + wl_rinfo->link_mode = BTC_WLINK_5G; + } else if (wl_rinfo->role_map.role.nan) { + wl_rinfo->link_mode = BTC_WLINK_2G_NAN; + } else if (cnt_connect > BTC_TDMA_WLROLE_MAX) { + wl_rinfo->link_mode = BTC_WLINK_OTHER; + } else if (b2g && b5g && cnt_connect == 2) { + if (rtwdev->dbcc_en) { + switch (wl_dinfo->role[RTW89_PHY_0]) { + case RTW89_WIFI_ROLE_STATION: + wl_rinfo->link_mode = BTC_WLINK_2G_STA; + break; + case RTW89_WIFI_ROLE_P2P_GO: + wl_rinfo->link_mode = BTC_WLINK_2G_GO; + break; + case RTW89_WIFI_ROLE_P2P_CLIENT: + wl_rinfo->link_mode = BTC_WLINK_2G_GC; + break; + case RTW89_WIFI_ROLE_AP: + wl_rinfo->link_mode = BTC_WLINK_2G_AP; + break; + default: + wl_rinfo->link_mode = BTC_WLINK_OTHER; + break; + } + } else { + wl_rinfo->link_mode = BTC_WLINK_25G_MCC; + } + } else if (!b5g && cnt_connect == 2) { + if (wl_rinfo->role_map.role.station && + (wl_rinfo->role_map.role.p2p_go || + wl_rinfo->role_map.role.p2p_gc || + wl_rinfo->role_map.role.ap)) { + if (wl_2g_ch[0] == wl_2g_ch[1]) + wl_rinfo->link_mode = BTC_WLINK_2G_SCC; + else + wl_rinfo->link_mode = BTC_WLINK_2G_MCC; + } else { + wl_rinfo->link_mode = BTC_WLINK_2G_MCC; + } + } else if (!b5g && cnt_connect == 1) { + if (wl_rinfo->role_map.role.station) + wl_rinfo->link_mode = BTC_WLINK_2G_STA; + else if (wl_rinfo->role_map.role.ap) + wl_rinfo->link_mode = BTC_WLINK_2G_AP; + else if (wl_rinfo->role_map.role.p2p_go) + wl_rinfo->link_mode = BTC_WLINK_2G_GO; + else if (wl_rinfo->role_map.role.p2p_gc) + wl_rinfo->link_mode = BTC_WLINK_2G_GC; + else + wl_rinfo->link_mode = BTC_WLINK_OTHER; + } + + /* if no client_joined, don't care P2P-GO/AP role */ + if (wl_rinfo->role_map.role.p2p_go || wl_rinfo->role_map.role.ap) { + if (!client_joined) { + if (wl_rinfo->link_mode == BTC_WLINK_2G_SCC || + wl_rinfo->link_mode == BTC_WLINK_2G_MCC) { + wl_rinfo->link_mode = BTC_WLINK_2G_STA; + wl_rinfo->connect_cnt = 1; + } else if (wl_rinfo->link_mode == BTC_WLINK_2G_GO || + wl_rinfo->link_mode == BTC_WLINK_2G_AP) { + wl_rinfo->link_mode = BTC_WLINK_NOLINK; + wl_rinfo->connect_cnt = 0; + } + } + } + + rtw89_debug(rtwdev, RTW89_DBG_BTC, + "[BTC], cnt_connect = %d, connecting = %d, link_mode = %d\n", + cnt_connect, cnt_connecting, wl_rinfo->link_mode); + + _fw_set_drv_info(rtwdev, CXDRVINFO_ROLE); +} + #define BTC_CHK_HANG_MAX 3 #define BTC_SCB_INV_VALUE GENMASK(31, 0) @@ -4676,6 +4925,7 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason) struct rtw89_btc_bt_info *bt = &btc->cx.bt; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; u8 mode; lockdep_assert_held(&rtwdev->mutex); @@ -4686,8 +4936,12 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason) if (ver->fwlrole == 0) mode = wl_rinfo->link_mode; - else + else if (ver->fwlrole == 1) mode = wl_rinfo_v1->link_mode; + else if (ver->fwlrole == 2) + mode = wl_rinfo_v2->link_mode; + else + return; rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): reason=%d, mode=%d\n", __func__, reason, mode); @@ -4812,6 +5066,8 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason) _action_wl_2g_scc(rtwdev); else if (ver->fwlrole == 1) _action_wl_2g_scc_v1(rtwdev); + else if (ver->fwlrole == 2) + _action_wl_2g_scc_v2(rtwdev); break; case BTC_WLINK_2G_MCC: bt->scan_rx_low_pri = true; @@ -5317,8 +5573,10 @@ void rtw89_btc_ntfy_role_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif memcpy(wlinfo, &r, sizeof(*wlinfo)); if (ver->fwlrole == 0) _update_wl_info(rtwdev); - else + else if (ver->fwlrole == 1) _update_wl_info_v1(rtwdev); + else if (ver->fwlrole == 2) + _update_wl_info_v2(rtwdev); if (wlinfo->role == RTW89_WIFI_ROLE_STATION && wlinfo->connected == MLME_NO_LINK) @@ -5838,6 +6096,7 @@ static void _show_wl_info(struct rtw89_dev *rtwdev, struct seq_file *m) struct rtw89_btc_wl_info *wl = &cx->wl; struct rtw89_btc_wl_role_info *wl_rinfo = &wl->role_info; struct rtw89_btc_wl_role_info_v1 *wl_rinfo_v1 = &wl->role_info_v1; + struct rtw89_btc_wl_role_info_v2 *wl_rinfo_v2 = &wl->role_info_v2; u8 mode; if (!(btc->dm.coex_info_map & BTC_COEX_INFO_WL)) @@ -5847,8 +6106,12 @@ static void _show_wl_info(struct rtw89_dev *rtwdev, struct seq_file *m) if (ver->fwlrole == 0) mode = wl_rinfo->link_mode; - else + else if (ver->fwlrole == 1) mode = wl_rinfo_v1->link_mode; + else if (ver->fwlrole == 2) + mode = wl_rinfo_v2->link_mode; + else + return; seq_printf(m, " %-15s : link_mode:%d, ", "[status]", mode); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index f998f8a3f2774..f51cd86651922 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1180,6 +1180,22 @@ struct rtw89_btc_wl_active_role_v1 { u32 noa_duration; /* ms */ }; +struct rtw89_btc_wl_active_role_v2 { + u8 connected: 1; + u8 pid: 3; + u8 phy: 1; + u8 noa: 1; + u8 band: 2; + + u8 client_ps: 1; + u8 bw: 7; + + u8 role; + u8 ch; + + u32 noa_duration; /* ms */ +}; + struct rtw89_btc_wl_role_info_bpos { u16 none: 1; u16 station: 1; @@ -1228,6 +1244,21 @@ struct rtw89_btc_wl_role_info_v1 { /* struct size must be n*4 bytes */ u32 rsvd: 27; }; +struct rtw89_btc_wl_role_info_v2 { /* struct size must be n*4 bytes */ + u8 connect_cnt; + u8 link_mode; + union rtw89_btc_wl_role_info_map role_map; + struct rtw89_btc_wl_active_role_v2 active_role_v2[RTW89_PORT_NUM]; + u32 mrole_type; /* btc_wl_mrole_type */ + u32 mrole_noa_duration; /* ms */ + + u32 dbcc_en: 1; + u32 dbcc_chg: 1; + u32 dbcc_2g_phy: 2; /* which phy operate in 2G, HW_PHY_0 or HW_PHY_1 */ + u32 link_mode_chg: 1; + u32 rsvd: 27; +}; + struct rtw89_btc_wl_ver_info { u32 fw_coex; /* match with which coex_ver */ u32 fw; @@ -1343,6 +1374,7 @@ struct rtw89_btc_wl_info { struct rtw89_btc_wl_afh_info afh_info; struct rtw89_btc_wl_role_info role_info; struct rtw89_btc_wl_role_info_v1 role_info_v1; + struct rtw89_btc_wl_role_info_v2 role_info_v2; struct rtw89_btc_wl_scan_info scan_info; struct rtw89_btc_wl_dbcc_info dbcc_info; struct rtw89_btc_rf_para rf_para; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 1a4ff24078fb9..46d9cda2c617c 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2038,6 +2038,92 @@ fail: return ret; } +#define H2C_LEN_CXDRVINFO_ROLE_SIZE_V2(max_role_num) \ + (4 + 8 * (max_role_num) + H2C_LEN_CXDRVINFO_ROLE_DBCC_LEN + H2C_LEN_CXDRVHDR) + +int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev) +{ + struct rtw89_btc *btc = &rtwdev->btc; + const struct rtw89_btc_ver *ver = btc->ver; + struct rtw89_btc_wl_info *wl = &btc->cx.wl; + struct rtw89_btc_wl_role_info_v2 *role_info = &wl->role_info_v2; + struct rtw89_btc_wl_role_info_bpos *bpos = &role_info->role_map.role; + struct rtw89_btc_wl_active_role_v2 *active = role_info->active_role_v2; + struct sk_buff *skb; + u32 len; + u8 *cmd, offset; + int ret; + int i; + + len = H2C_LEN_CXDRVINFO_ROLE_SIZE_V2(ver->max_role_num); + + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len); + if (!skb) { + rtw89_err(rtwdev, "failed to alloc skb for h2c cxdrv_role\n"); + return -ENOMEM; + } + skb_put(skb, len); + cmd = skb->data; + + RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_ROLE); + RTW89_SET_FWCMD_CXHDR_LEN(cmd, len - H2C_LEN_CXDRVHDR); + + RTW89_SET_FWCMD_CXROLE_CONNECT_CNT(cmd, role_info->connect_cnt); + RTW89_SET_FWCMD_CXROLE_LINK_MODE(cmd, role_info->link_mode); + + RTW89_SET_FWCMD_CXROLE_ROLE_NONE(cmd, bpos->none); + RTW89_SET_FWCMD_CXROLE_ROLE_STA(cmd, bpos->station); + RTW89_SET_FWCMD_CXROLE_ROLE_AP(cmd, bpos->ap); + RTW89_SET_FWCMD_CXROLE_ROLE_VAP(cmd, bpos->vap); + RTW89_SET_FWCMD_CXROLE_ROLE_ADHOC(cmd, bpos->adhoc); + RTW89_SET_FWCMD_CXROLE_ROLE_ADHOC_MASTER(cmd, bpos->adhoc_master); + RTW89_SET_FWCMD_CXROLE_ROLE_MESH(cmd, bpos->mesh); + RTW89_SET_FWCMD_CXROLE_ROLE_MONITOR(cmd, bpos->moniter); + RTW89_SET_FWCMD_CXROLE_ROLE_P2P_DEV(cmd, bpos->p2p_device); + RTW89_SET_FWCMD_CXROLE_ROLE_P2P_GC(cmd, bpos->p2p_gc); + RTW89_SET_FWCMD_CXROLE_ROLE_P2P_GO(cmd, bpos->p2p_go); + RTW89_SET_FWCMD_CXROLE_ROLE_NAN(cmd, bpos->nan); + + offset = PORT_DATA_OFFSET; + for (i = 0; i < RTW89_PORT_NUM; i++, active++) { + RTW89_SET_FWCMD_CXROLE_ACT_CONNECTED_V2(cmd, active->connected, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_PID_V2(cmd, active->pid, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_PHY_V2(cmd, active->phy, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_NOA_V2(cmd, active->noa, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_BAND_V2(cmd, active->band, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_CLIENT_PS_V2(cmd, active->client_ps, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_BW_V2(cmd, active->bw, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_ROLE_V2(cmd, active->role, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_CH_V2(cmd, active->ch, i, offset); + RTW89_SET_FWCMD_CXROLE_ACT_NOA_DUR_V2(cmd, active->noa_duration, i, offset); + } + + offset = len - H2C_LEN_CXDRVINFO_ROLE_DBCC_LEN; + RTW89_SET_FWCMD_CXROLE_MROLE_TYPE(cmd, role_info->mrole_type, offset); + RTW89_SET_FWCMD_CXROLE_MROLE_NOA(cmd, role_info->mrole_noa_duration, offset); + RTW89_SET_FWCMD_CXROLE_DBCC_EN(cmd, role_info->dbcc_en, offset); + RTW89_SET_FWCMD_CXROLE_DBCC_CHG(cmd, role_info->dbcc_chg, offset); + RTW89_SET_FWCMD_CXROLE_DBCC_2G_PHY(cmd, role_info->dbcc_2g_phy, offset); + RTW89_SET_FWCMD_CXROLE_LINK_MODE_CHG(cmd, role_info->link_mode_chg, offset); + + rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, + H2C_CAT_OUTSRC, BTFC_SET, + SET_DRV_INFO, 0, 0, + len); + + ret = rtw89_h2c_tx(rtwdev, skb, false); + if (ret) { + rtw89_err(rtwdev, "failed to send h2c\n"); + goto fail; + } + + return 0; +fail: + dev_kfree_skb_any(skb); + + return ret; +} + #define H2C_LEN_CXDRVINFO_CTRL (4 + H2C_LEN_CXDRVHDR) int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev) { diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 3f6e0871381df..fa9e786ad7c9a 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -2393,6 +2393,56 @@ static inline void RTW89_SET_FWCMD_CXROLE_ACT_NOA_DUR(void *cmd, u32 val, int n, le32p_replace_bits((__le32 *)((u8 *)cmd + (20 + (12 + offset) * n)), val, GENMASK(31, 0)); } +static inline void RTW89_SET_FWCMD_CXROLE_ACT_CONNECTED_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (6 + (12 + offset) * n), val, BIT(0)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_PID_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (6 + (12 + offset) * n), val, GENMASK(3, 1)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_PHY_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (6 + (12 + offset) * n), val, BIT(4)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_NOA_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (6 + (12 + offset) * n), val, BIT(5)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_BAND_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (6 + (12 + offset) * n), val, GENMASK(7, 6)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_CLIENT_PS_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (7 + (12 + offset) * n), val, BIT(0)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_BW_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (7 + (12 + offset) * n), val, GENMASK(7, 1)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_ROLE_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (8 + (12 + offset) * n), val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_CH_V2(void *cmd, u8 val, int n, u8 offset) +{ + u8p_replace_bits((u8 *)cmd + (9 + (12 + offset) * n), val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXROLE_ACT_NOA_DUR_V2(void *cmd, u32 val, int n, u8 offset) +{ + le32p_replace_bits((__le32 *)((u8 *)cmd + (10 + (12 + offset) * n)), val, GENMASK(31, 0)); +} + static inline void RTW89_SET_FWCMD_CXROLE_MROLE_TYPE(void *cmd, u32 val, u8 offset) { le32p_replace_bits((__le32 *)((u8 *)cmd + offset), val, GENMASK(31, 0)); @@ -3505,6 +3555,7 @@ int rtw89_fw_h2c_ra(struct rtw89_dev *rtwdev, struct rtw89_ra_info *ra, bool csi int rtw89_fw_h2c_cxdrv_init(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev); +int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_del_pkt_offload(struct rtw89_dev *rtwdev, u8 id); -- GitLab From a2c0ce5d01a2218af4756d311ae91845b67ac5b9 Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:21 +0800 Subject: [PATCH 0374/2078] wifi: rtw89: coex: Add traffic TX/RX info and its H2C There is a new mechanism which can do some real time performance tuning for WiFi and BT. This TX/RX info is a condition provide to firmware to do traffic analysis. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 59 +++++++++++++-- drivers/net/wireless/realtek/rtw89/core.h | 43 +++++++++++ drivers/net/wireless/realtek/rtw89/fw.c | 56 +++++++++++++++ drivers/net/wireless/realtek/rtw89/fw.h | 87 +++++++++++++++++++++++ 4 files changed, 241 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index b56897529504a..908ef8bb149a0 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -1809,6 +1809,9 @@ static void _fw_set_drv_info(struct rtw89_dev *rtwdev, u8 type) { struct rtw89_btc *btc = &rtwdev->btc; const struct rtw89_btc_ver *ver = btc->ver; + struct rtw89_btc_dm *dm = &btc->dm; + struct rtw89_btc_wl_info *wl = &btc->cx.wl; + struct rtw89_btc_rf_trx_para rf_para = dm->rf_trx_para; switch (type) { case CXDRVINFO_INIT: @@ -1825,6 +1828,19 @@ static void _fw_set_drv_info(struct rtw89_dev *rtwdev, u8 type) case CXDRVINFO_CTRL: rtw89_fw_h2c_cxdrv_ctrl(rtwdev); break; + case CXDRVINFO_TRX: + dm->trx_info.tx_power = u32_get_bits(rf_para.wl_tx_power, + RTW89_BTC_WL_DEF_TX_PWR); + dm->trx_info.rx_gain = u32_get_bits(rf_para.wl_rx_gain, + RTW89_BTC_WL_DEF_TX_PWR); + dm->trx_info.bt_tx_power = u32_get_bits(rf_para.bt_tx_power, + RTW89_BTC_WL_DEF_TX_PWR); + dm->trx_info.bt_rx_gain = u32_get_bits(rf_para.bt_rx_gain, + RTW89_BTC_WL_DEF_TX_PWR); + dm->trx_info.cn = wl->cn_report; + dm->trx_info.nhm = wl->nhm.pwr; + rtw89_fw_h2c_cxdrv_trx(rtwdev); + break; case CXDRVINFO_RFK: rtw89_fw_h2c_cxdrv_rfk(rtwdev); break; @@ -5361,6 +5377,8 @@ void rtw89_btc_ntfy_icmp_packet_work(struct work_struct *work) mutex_unlock(&rtwdev->mutex); } +#define BT_PROFILE_PROTOCOL_MASK GENMASK(7, 4) + static void _update_bt_info(struct rtw89_dev *rtwdev, u8 *buf, u32 len) { const struct rtw89_chip_info *chip = rtwdev->chip; @@ -5417,6 +5435,7 @@ static void _update_bt_info(struct rtw89_dev *rtwdev, u8 *buf, u32 len) a2dp->exist = btinfo.lb2.a2dp; b->profile_cnt.now += (u8)a2dp->exist; pan->active = btinfo.lb2.pan; + btc->dm.trx_info.bt_profile = u32_get_bits(btinfo.val, BT_PROFILE_PROTOCOL_MASK); /* parse raw info low-Byte3 */ btinfo.val = bt->raw_info[BTC_BTINFO_L3]; @@ -5433,6 +5452,7 @@ static void _update_bt_info(struct rtw89_dev *rtwdev, u8 *buf, u32 len) btinfo.val = bt->raw_info[BTC_BTINFO_H0]; /* raw val is dBm unit, translate from -100~ 0dBm to 0~100%*/ b->rssi = chip->ops->btc_get_bt_rssi(rtwdev, btinfo.hb0.rssi); + btc->dm.trx_info.bt_rssi = b->rssi; /* parse raw info high-Byte1 */ btinfo.val = bt->raw_info[BTC_BTINFO_H1]; @@ -5766,6 +5786,8 @@ static void rtw89_btc_ntfy_wl_sta_iter(void *data, struct ieee80211_sta *sta) (struct rtw89_btc_wl_sta_iter_data *)data; struct rtw89_dev *rtwdev = iter_data->rtwdev; struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_dm *dm = &btc->dm; + const struct rtw89_btc_ver *ver = btc->ver; struct rtw89_btc_wl_info *wl = &btc->cx.wl; struct rtw89_btc_wl_link_info *link_info = NULL; struct rtw89_sta *rtwsta = (struct rtw89_sta *)sta->drv_priv; @@ -5773,6 +5795,8 @@ static void rtw89_btc_ntfy_wl_sta_iter(void *data, struct ieee80211_sta *sta) struct rtw89_vif *rtwvif = rtwsta->rtwvif; struct rtw89_traffic_stats *stats = &rtwvif->stats; const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_btc_wl_role_info *r; + struct rtw89_btc_wl_role_info_v1 *r1; u32 last_tx_rate, last_rx_rate; u16 last_tx_lvl, last_rx_lvl; u8 port = rtwvif->port; @@ -5849,10 +5873,33 @@ static void rtw89_btc_ntfy_wl_sta_iter(void *data, struct ieee80211_sta *sta) link_info_t->tx_rate = rtwsta->ra_report.hw_rate; link_info_t->rx_rate = rtwsta->rx_hw_rate; - wl->role_info.active_role[port].tx_lvl = (u16)stats->tx_tfc_lv; - wl->role_info.active_role[port].rx_lvl = (u16)stats->rx_tfc_lv; - wl->role_info.active_role[port].tx_rate = rtwsta->ra_report.hw_rate; - wl->role_info.active_role[port].rx_rate = rtwsta->rx_hw_rate; + if (link_info->role == RTW89_WIFI_ROLE_STATION || + link_info->role == RTW89_WIFI_ROLE_P2P_CLIENT) { + dm->trx_info.tx_rate = link_info_t->tx_rate; + dm->trx_info.rx_rate = link_info_t->rx_rate; + } + + if (ver->fwlrole == 0) { + r = &wl->role_info; + r->active_role[port].tx_lvl = stats->tx_tfc_lv; + r->active_role[port].rx_lvl = stats->rx_tfc_lv; + r->active_role[port].tx_rate = rtwsta->ra_report.hw_rate; + r->active_role[port].rx_rate = rtwsta->rx_hw_rate; + } else if (ver->fwlrole == 1) { + r1 = &wl->role_info_v1; + r1->active_role_v1[port].tx_lvl = stats->tx_tfc_lv; + r1->active_role_v1[port].rx_lvl = stats->rx_tfc_lv; + r1->active_role_v1[port].tx_rate = rtwsta->ra_report.hw_rate; + r1->active_role_v1[port].rx_rate = rtwsta->rx_hw_rate; + } else if (ver->fwlrole == 2) { + dm->trx_info.tx_lvl = stats->tx_tfc_lv; + dm->trx_info.rx_lvl = stats->rx_tfc_lv; + dm->trx_info.tx_rate = rtwsta->ra_report.hw_rate; + dm->trx_info.rx_rate = rtwsta->rx_hw_rate; + } + + dm->trx_info.tx_tp = link_info_t->tx_throughput; + dm->trx_info.rx_tp = link_info_t->rx_throughput; if (is_sta_change) iter_data->is_sta_change = true; @@ -5866,6 +5913,7 @@ static void rtw89_btc_ntfy_wl_sta_iter(void *data, struct ieee80211_sta *sta) void rtw89_btc_ntfy_wl_sta(struct rtw89_dev *rtwdev) { struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_dm *dm = &btc->dm; struct rtw89_btc_wl_info *wl = &btc->cx.wl; struct rtw89_btc_wl_sta_iter_data data = {.rtwdev = rtwdev}; u8 i; @@ -5884,6 +5932,9 @@ void rtw89_btc_ntfy_wl_sta(struct rtw89_dev *rtwdev) } } + if (dm->trx_info.wl_rssi != wl->rssi_level) + dm->trx_info.wl_rssi = wl->rssi_level; + rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): busy=%d\n", __func__, !!wl->status.map.busy); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index f51cd86651922..024441d45a66c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1367,6 +1367,22 @@ struct rtw89_btc_rf_para { u32 rx_gain_perpkt; }; +struct rtw89_btc_wl_nhm { + u8 instant_wl_nhm_dbm; + u8 instant_wl_nhm_per_mhz; + u16 valid_record_times; + s8 record_pwr[16]; + u8 record_ratio[16]; + s8 pwr; /* dbm_per_MHz */ + u8 ratio; + u8 current_status; + u8 refresh; + bool start_flag; + u8 last_ccx_rpt_stamp; + s8 pwr_max; + s8 pwr_min; +}; + struct rtw89_btc_wl_info { struct rtw89_btc_wl_link_info link_info[RTW89_PORT_NUM]; struct rtw89_btc_wl_rfk_info rfk_info; @@ -1378,10 +1394,12 @@ struct rtw89_btc_wl_info { struct rtw89_btc_wl_scan_info scan_info; struct rtw89_btc_wl_dbcc_info dbcc_info; struct rtw89_btc_rf_para rf_para; + struct rtw89_btc_wl_nhm nhm; union rtw89_btc_wl_state_map status; u8 port_id[RTW89_WIFI_ROLE_MLME_MAX]; u8 rssi_level; + u8 cn_report; bool scbd_change; u32 scbd; @@ -2019,6 +2037,30 @@ struct rtw89_btc_rf_trx_para { u8 bt_rx_gain; /* LNA constrain level */ }; +struct rtw89_btc_trx_info { + u8 tx_lvl; + u8 rx_lvl; + u8 wl_rssi; + u8 bt_rssi; + + s8 tx_power; /* absolute Tx power (dBm), 0xff-> no BTC control */ + s8 rx_gain; /* rx gain table index (TBD.) */ + s8 bt_tx_power; /* decrease Tx power (dB) */ + s8 bt_rx_gain; /* LNA constrain level */ + + u8 cn; /* condition_num */ + s8 nhm; + u8 bt_profile; + u8 rsvd2; + + u16 tx_rate; + u16 rx_rate; + + u32 tx_tp; + u32 rx_tp; + u32 rx_err_ratio; +}; + struct rtw89_btc_dm { struct rtw89_btc_fbtc_slot slot[CXST_MAX]; struct rtw89_btc_fbtc_slot slot_now[CXST_MAX]; @@ -2030,6 +2072,7 @@ struct rtw89_btc_dm { struct rtw89_btc_wl_tx_limit_para wl_tx_limit; struct rtw89_btc_dm_step dm_step; struct rtw89_btc_wl_scc_ctrl wl_scc; + struct rtw89_btc_trx_info trx_info; union rtw89_btc_dm_error_map error; u32 cnt_dm[BTC_DCNT_NUM]; u32 cnt_notify[BTC_NCNT_NUM]; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 46d9cda2c617c..713aefd0cf8dd 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2169,6 +2169,62 @@ fail: return ret; } +#define H2C_LEN_CXDRVINFO_TRX (28 + H2C_LEN_CXDRVHDR) +int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev) +{ + struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_trx_info *trx = &btc->dm.trx_info; + struct sk_buff *skb; + u8 *cmd; + int ret; + + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_LEN_CXDRVINFO_TRX); + if (!skb) { + rtw89_err(rtwdev, "failed to alloc skb for h2c cxdrv_trx\n"); + return -ENOMEM; + } + skb_put(skb, H2C_LEN_CXDRVINFO_TRX); + cmd = skb->data; + + RTW89_SET_FWCMD_CXHDR_TYPE(cmd, CXDRVINFO_TRX); + RTW89_SET_FWCMD_CXHDR_LEN(cmd, H2C_LEN_CXDRVINFO_TRX - H2C_LEN_CXDRVHDR); + + RTW89_SET_FWCMD_CXTRX_TXLV(cmd, trx->tx_lvl); + RTW89_SET_FWCMD_CXTRX_RXLV(cmd, trx->rx_lvl); + RTW89_SET_FWCMD_CXTRX_WLRSSI(cmd, trx->wl_rssi); + RTW89_SET_FWCMD_CXTRX_BTRSSI(cmd, trx->bt_rssi); + RTW89_SET_FWCMD_CXTRX_TXPWR(cmd, trx->tx_power); + RTW89_SET_FWCMD_CXTRX_RXGAIN(cmd, trx->rx_gain); + RTW89_SET_FWCMD_CXTRX_BTTXPWR(cmd, trx->bt_tx_power); + RTW89_SET_FWCMD_CXTRX_BTRXGAIN(cmd, trx->bt_rx_gain); + RTW89_SET_FWCMD_CXTRX_CN(cmd, trx->cn); + RTW89_SET_FWCMD_CXTRX_NHM(cmd, trx->nhm); + RTW89_SET_FWCMD_CXTRX_BTPROFILE(cmd, trx->bt_profile); + RTW89_SET_FWCMD_CXTRX_RSVD2(cmd, trx->rsvd2); + RTW89_SET_FWCMD_CXTRX_TXRATE(cmd, trx->tx_rate); + RTW89_SET_FWCMD_CXTRX_RXRATE(cmd, trx->rx_rate); + RTW89_SET_FWCMD_CXTRX_TXTP(cmd, trx->tx_tp); + RTW89_SET_FWCMD_CXTRX_RXTP(cmd, trx->rx_tp); + RTW89_SET_FWCMD_CXTRX_RXERRRA(cmd, trx->rx_err_ratio); + + rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, + H2C_CAT_OUTSRC, BTFC_SET, + SET_DRV_INFO, 0, 0, + H2C_LEN_CXDRVINFO_TRX); + + ret = rtw89_h2c_tx(rtwdev, skb, false); + if (ret) { + rtw89_err(rtwdev, "failed to send h2c\n"); + goto fail; + } + + return 0; +fail: + dev_kfree_skb_any(skb); + + return ret; +} + #define H2C_LEN_CXDRVINFO_RFK (4 + H2C_LEN_CXDRVHDR) int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev) { diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index fa9e786ad7c9a..c3c67ddf61a24 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -2152,6 +2152,7 @@ enum rtw89_btc_cxdrvinfo { CXDRVINFO_RUN, CXDRVINFO_CTRL, CXDRVINFO_SCAN, + CXDRVINFO_TRX, /* WL traffic to WL fw */ CXDRVINFO_MAX, }; @@ -2493,6 +2494,91 @@ static inline void RTW89_SET_FWCMD_CXCTRL_TRACE_STEP(void *cmd, u32 val) le32p_replace_bits((__le32 *)((u8 *)(cmd) + 2), val, GENMASK(18, 3)); } +static inline void RTW89_SET_FWCMD_CXTRX_TXLV(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 2, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RXLV(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 3, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_WLRSSI(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 4, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_BTRSSI(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 5, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_TXPWR(void *cmd, s8 val) +{ + u8p_replace_bits((u8 *)cmd + 6, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RXGAIN(void *cmd, s8 val) +{ + u8p_replace_bits((u8 *)cmd + 7, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_BTTXPWR(void *cmd, s8 val) +{ + u8p_replace_bits((u8 *)cmd + 8, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_BTRXGAIN(void *cmd, s8 val) +{ + u8p_replace_bits((u8 *)cmd + 9, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_CN(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 10, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_NHM(void *cmd, s8 val) +{ + u8p_replace_bits((u8 *)cmd + 11, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_BTPROFILE(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 12, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RSVD2(void *cmd, u8 val) +{ + u8p_replace_bits((u8 *)cmd + 13, val, GENMASK(7, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_TXRATE(void *cmd, u16 val) +{ + le16p_replace_bits((__le16 *)((u8 *)cmd + 14), val, GENMASK(15, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RXRATE(void *cmd, u16 val) +{ + le16p_replace_bits((__le16 *)((u8 *)cmd + 16), val, GENMASK(15, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_TXTP(void *cmd, u32 val) +{ + le32p_replace_bits((__le32 *)((u8 *)cmd + 18), val, GENMASK(31, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RXTP(void *cmd, u32 val) +{ + le32p_replace_bits((__le32 *)((u8 *)cmd + 22), val, GENMASK(31, 0)); +} + +static inline void RTW89_SET_FWCMD_CXTRX_RXERRRA(void *cmd, u32 val) +{ + le32p_replace_bits((__le32 *)((u8 *)cmd + 26), val, GENMASK(31, 0)); +} + static inline void RTW89_SET_FWCMD_CXRFK_STATE(void *cmd, u32 val) { le32p_replace_bits((__le32 *)((u8 *)(cmd) + 2), val, GENMASK(1, 0)); @@ -3557,6 +3643,7 @@ int rtw89_fw_h2c_cxdrv_role(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_role_v1(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_role_v2(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_ctrl(struct rtw89_dev *rtwdev); +int rtw89_fw_h2c_cxdrv_trx(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_cxdrv_rfk(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_del_pkt_offload(struct rtw89_dev *rtwdev, u8 id); int rtw89_fw_h2c_add_pkt_offload(struct rtw89_dev *rtwdev, u8 *id, -- GitLab From e5e52feb5053a537180cf928428deb8bc697a42e Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:22 +0800 Subject: [PATCH 0375/2078] wifi: rtw89: coex: Add register monitor report v2 format The v2 firmware report reduce its maximum register numbers from 30 to 20, it can help to save firmware code size. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 148 ++++++++++++++++++++-- drivers/net/wireless/realtek/rtw89/core.h | 17 ++- 2 files changed, 151 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index 908ef8bb149a0..436d5e35ae49b 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -226,7 +226,6 @@ struct rtw89_btc_btf_set_slot_table { u8 buf[]; } __packed; -#define BTF_SET_MON_REG_VER 1 struct rtw89_btc_btf_set_mon_reg { u8 fver; u8 reg_num; @@ -1078,8 +1077,15 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, break; case BTC_RPT_TYPE_MREG: pcinfo = &pfwinfo->rpt_fbtc_mregval.cinfo; - pfinfo = &pfwinfo->rpt_fbtc_mregval.finfo; - pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_mregval.finfo); + if (ver->fcxmreg == 1) { + pfinfo = &pfwinfo->rpt_fbtc_mregval.finfo.v1; + pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_mregval.finfo.v1); + } else if (ver->fcxmreg == 2) { + pfinfo = &pfwinfo->rpt_fbtc_mregval.finfo.v2; + pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_mregval.finfo.v2); + } else { + goto err; + } pcinfo->req_fver = ver->fcxmreg; break; case BTC_RPT_TYPE_GPIO_DBG: @@ -1709,18 +1715,26 @@ static void rtw89_btc_fw_set_slots(struct rtw89_dev *rtwdev, u8 num, static void btc_fw_set_monreg(struct rtw89_dev *rtwdev) { const struct rtw89_chip_info *chip = rtwdev->chip; + const struct rtw89_btc_ver *ver = rtwdev->btc.ver; struct rtw89_btc_btf_set_mon_reg *monreg = NULL; - u8 n, *ptr = NULL, ulen; + u8 n, *ptr = NULL, ulen, cxmreg_max; u16 sz = 0; n = chip->mon_reg_num; - rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): mon_reg_num=%d\n", __func__, n); - if (n > CXMREG_MAX) { + + if (ver->fcxmreg == 1) + cxmreg_max = CXMREG_MAX; + else if (ver->fcxmreg == 2) + cxmreg_max = CXMREG_MAX_V2; + else + return; + + if (n > cxmreg_max) { rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): mon reg count %d > %d\n", - __func__, n, CXMREG_MAX); + __func__, n, cxmreg_max); return; } @@ -1730,7 +1744,7 @@ static void btc_fw_set_monreg(struct rtw89_dev *rtwdev) if (!monreg) return; - monreg->fver = BTF_SET_MON_REG_VER; + monreg->fver = ver->fcxmreg; monreg->reg_num = n; ptr = &monreg->buf[0]; memcpy(ptr, chip->mon_reg, n * ulen); @@ -7401,13 +7415,13 @@ static void _get_gnt(struct rtw89_dev *rtwdev, struct rtw89_mac_ax_coex_gnt *gnt } } -static void _show_mreg(struct rtw89_dev *rtwdev, struct seq_file *m) +static void _show_mreg_v1(struct rtw89_dev *rtwdev, struct seq_file *m) { const struct rtw89_chip_info *chip = rtwdev->chip; struct rtw89_btc *btc = &rtwdev->btc; struct rtw89_btc_btf_fwinfo *pfwinfo = &btc->fwinfo; struct rtw89_btc_rpt_cmn_info *pcinfo = NULL; - struct rtw89_btc_fbtc_mreg_val *pmreg = NULL; + struct rtw89_btc_fbtc_mreg_val_v1 *pmreg = NULL; struct rtw89_btc_fbtc_gpio_dbg *gdbg = NULL; struct rtw89_btc_cx *cx = &btc->cx; struct rtw89_btc_wl_info *wl = &btc->cx.wl; @@ -7457,7 +7471,7 @@ static void _show_mreg(struct rtw89_dev *rtwdev, struct seq_file *m) return; } - pmreg = &pfwinfo->rpt_fbtc_mregval.finfo; + pmreg = &pfwinfo->rpt_fbtc_mregval.finfo.v1; rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): rpt_fbtc_mregval reg_num = %d\n", __func__, pmreg->reg_num); @@ -7486,6 +7500,111 @@ static void _show_mreg(struct rtw89_dev *rtwdev, struct seq_file *m) rtw89_debug(rtwdev, RTW89_DBG_BTC, "[BTC], %s(): stop due rpt_fbtc_gpio_dbg.cinfo\n", __func__); + seq_puts(m, "\n"); + return; + } + + gdbg = &pfwinfo->rpt_fbtc_gpio_dbg.finfo; + if (!gdbg->en_map) + return; + + seq_printf(m, " %-15s : enable_map:0x%08x", + "[gpio_dbg]", gdbg->en_map); + + for (i = 0; i < BTC_DBG_MAX1; i++) { + if (!(gdbg->en_map & BIT(i))) + continue; + seq_printf(m, ", %d->GPIO%d", (u32)i, gdbg->gpio_map[i]); + } + seq_puts(m, "\n"); +} + +static void _show_mreg_v2(struct rtw89_dev *rtwdev, struct seq_file *m) +{ + const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_btf_fwinfo *pfwinfo = &btc->fwinfo; + struct rtw89_btc_rpt_cmn_info *pcinfo = NULL; + struct rtw89_btc_fbtc_mreg_val_v2 *pmreg = NULL; + struct rtw89_btc_fbtc_gpio_dbg *gdbg = NULL; + struct rtw89_btc_cx *cx = &btc->cx; + struct rtw89_btc_wl_info *wl = &btc->cx.wl; + struct rtw89_btc_bt_info *bt = &btc->cx.bt; + struct rtw89_mac_ax_coex_gnt gnt_cfg = {}; + struct rtw89_mac_ax_gnt gnt; + u8 i = 0, type = 0, cnt = 0; + u32 val, offset; + + if (!(btc->dm.coex_info_map & BTC_COEX_INFO_MREG)) + return; + + seq_puts(m, "========== [HW Status] ==========\n"); + + seq_printf(m, + " %-15s : WL->BT:0x%08x(cnt:%d), BT->WL:0x%08x(total:%d, bt_update:%d)\n", + "[scoreboard]", wl->scbd, cx->cnt_wl[BTC_WCNT_SCBDUPDATE], + bt->scbd, cx->cnt_bt[BTC_BCNT_SCBDREAD], + cx->cnt_bt[BTC_BCNT_SCBDUPDATE]); + + /* To avoid I/O if WL LPS or power-off */ + if (!wl->status.map.lps && !wl->status.map.rf_off) { + btc->dm.pta_owner = rtw89_mac_get_ctrl_path(rtwdev); + + _get_gnt(rtwdev, &gnt_cfg); + gnt = gnt_cfg.band[0]; + seq_printf(m, + " %-15s : pta_owner:%s, phy-0[gnt_wl:%s-%d/gnt_bt:%s-%d], ", + "[gnt_status]", + chip->chip_id == RTL8852C ? "HW" : + btc->dm.pta_owner == BTC_CTRL_BY_WL ? "WL" : "BT", + gnt.gnt_wl_sw_en ? "SW" : "HW", gnt.gnt_wl, + gnt.gnt_bt_sw_en ? "SW" : "HW", gnt.gnt_bt); + + gnt = gnt_cfg.band[1]; + seq_printf(m, "phy-1[gnt_wl:%s-%d/gnt_bt:%s-%d]\n", + gnt.gnt_wl_sw_en ? "SW" : "HW", + gnt.gnt_wl, + gnt.gnt_bt_sw_en ? "SW" : "HW", + gnt.gnt_bt); + } + pcinfo = &pfwinfo->rpt_fbtc_mregval.cinfo; + if (!pcinfo->valid) { + rtw89_debug(rtwdev, RTW89_DBG_BTC, + "[BTC], %s(): stop due rpt_fbtc_mregval.cinfo\n", + __func__); + return; + } + + pmreg = &pfwinfo->rpt_fbtc_mregval.finfo.v2; + rtw89_debug(rtwdev, RTW89_DBG_BTC, + "[BTC], %s(): rpt_fbtc_mregval reg_num = %d\n", + __func__, pmreg->reg_num); + + for (i = 0; i < pmreg->reg_num; i++) { + type = (u8)le16_to_cpu(chip->mon_reg[i].type); + offset = le32_to_cpu(chip->mon_reg[i].offset); + val = le32_to_cpu(pmreg->mreg_val[i]); + + if (cnt % 6 == 0) + seq_printf(m, " %-15s : %d_0x%04x=0x%08x", + "[reg]", (u32)type, offset, val); + else + seq_printf(m, ", %d_0x%04x=0x%08x", (u32)type, + offset, val); + if (cnt % 6 == 5) + seq_puts(m, "\n"); + cnt++; + + if (i >= pmreg->reg_num) + seq_puts(m, "\n"); + } + + pcinfo = &pfwinfo->rpt_fbtc_gpio_dbg.cinfo; + if (!pcinfo->valid) { + rtw89_debug(rtwdev, RTW89_DBG_BTC, + "[BTC], %s(): stop due rpt_fbtc_gpio_dbg.cinfo\n", + __func__); + seq_puts(m, "\n"); return; } @@ -7868,7 +7987,12 @@ void rtw89_btc_dump_info(struct rtw89_dev *rtwdev, struct seq_file *m) _show_bt_info(rtwdev, m); _show_dm_info(rtwdev, m); _show_fw_dm_msg(rtwdev, m); - _show_mreg(rtwdev, m); + + if (ver->fcxmreg == 1) + _show_mreg_v1(rtwdev, m); + else if (ver->fcxmreg == 2) + _show_mreg_v2(rtwdev, m); + if (ver->fcxbtcrpt == 1) _show_summary_v1(rtwdev, m); else if (ver->fcxbtcrpt == 4) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 024441d45a66c..b0525f258d3c3 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1524,6 +1524,7 @@ union rtw89_btc_fbtc_tdma_le32 { }; #define CXMREG_MAX 30 +#define CXMREG_MAX_V2 20 #define FCXMAX_STEP 255 /*STEP trace record cnt, Max:65535, default:255*/ #define BTC_CYCLE_SLOT_MAX 48 /* must be even number, non-zero */ @@ -1750,13 +1751,25 @@ struct rtw89_btc_fbtc_gpio_dbg { u8 gpio_map[BTC_DBG_MAX1]; /*the debug signals to GPIO-Position */ } __packed; -struct rtw89_btc_fbtc_mreg_val { +struct rtw89_btc_fbtc_mreg_val_v1 { u8 fver; /* btc_ver::fcxmreg */ u8 reg_num; __le16 rsvd; __le32 mreg_val[CXMREG_MAX]; } __packed; +struct rtw89_btc_fbtc_mreg_val_v2 { + u8 fver; /* btc_ver::fcxmreg */ + u8 reg_num; + __le16 rsvd; + __le32 mreg_val[CXMREG_MAX_V2]; +} __packed; + +union rtw89_btc_fbtc_mreg_val { + struct rtw89_btc_fbtc_mreg_val_v1 v1; + struct rtw89_btc_fbtc_mreg_val_v2 v2; +}; + #define RTW89_DEF_FBTC_MREG(__type, __bytes, __offset) \ { .type = cpu_to_le16(__type), .bytes = cpu_to_le16(__bytes), \ .offset = cpu_to_le32(__offset), } @@ -2203,7 +2216,7 @@ struct rtw89_btc_rpt_fbtc_nullsta { struct rtw89_btc_rpt_fbtc_mreg { struct rtw89_btc_rpt_cmn_info cinfo; /* common info, by driver */ - struct rtw89_btc_fbtc_mreg_val finfo; /* info from fw */ + union rtw89_btc_fbtc_mreg_val finfo; /* info from fw */ }; struct rtw89_btc_rpt_fbtc_gpio_dbg { -- GitLab From 9dfa09e0628d2024ce4574f645344c00fe88a535 Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:23 +0800 Subject: [PATCH 0376/2078] wifi: rtw89: coex: Fix wrong structure assignment at null data report Correct pointer assignment of v1 null data report. It doesn't really change logic at all, but it looks more readable. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index 436d5e35ae49b..0ca621a9f80a1 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -1065,7 +1065,7 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, case BTC_RPT_TYPE_NULLSTA: pcinfo = &pfwinfo->rpt_fbtc_nullsta.cinfo; if (ver->fcxnullsta == 1) { - pfinfo = &pfwinfo->rpt_fbtc_nullsta.finfo; + pfinfo = &pfwinfo->rpt_fbtc_nullsta.finfo.v1; pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_nullsta.finfo.v1); } else if (ver->fcxnullsta == 2) { pfinfo = &pfwinfo->rpt_fbtc_nullsta.finfo.v2; -- GitLab From 262cc19ea902d2280e2e2a56b153f37466e3349e Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:24 +0800 Subject: [PATCH 0377/2078] wifi: rtw89: coex: Add v2 Bluetooth scan info Compare to v1 and v2 removed some not usable parameters. Save firmware code size. The information can show how frequent and how long the Bluetooth scan do. It will help to debug coexistence issue. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 67 +++++++++++++++++++++-- drivers/net/wireless/realtek/rtw89/core.h | 63 +++++++++++++++------ 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index 0ca621a9f80a1..b867e54feb896 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -890,13 +890,15 @@ static void _update_bt_report(struct rtw89_dev *rtwdev, u8 rpt_type, u8 *pfinfo) struct rtw89_btc_bt_link_info *bt_linfo = &bt->link_info; struct rtw89_btc_bt_a2dp_desc *a2dp = &bt_linfo->a2dp_desc; struct rtw89_btc_fbtc_btver *pver = NULL; - struct rtw89_btc_fbtc_btscan *pscan = NULL; + struct rtw89_btc_fbtc_btscan_v1 *pscan_v1; + struct rtw89_btc_fbtc_btscan_v2 *pscan_v2; struct rtw89_btc_fbtc_btafh *pafh_v1 = NULL; struct rtw89_btc_fbtc_btafh_v2 *pafh_v2 = NULL; struct rtw89_btc_fbtc_btdevinfo *pdev = NULL; + bool scan_update = true; + int i; pver = (struct rtw89_btc_fbtc_btver *)pfinfo; - pscan = (struct rtw89_btc_fbtc_btscan *)pfinfo; pdev = (struct rtw89_btc_fbtc_btdevinfo *)pfinfo; rtw89_debug(rtwdev, RTW89_DBG_BTC, @@ -910,7 +912,26 @@ static void _update_bt_report(struct rtw89_dev *rtwdev, u8 rpt_type, u8 *pfinfo) bt->feature = le32_to_cpu(pver->feature); break; case BTC_RPT_TYPE_BT_SCAN: - memcpy(bt->scan_info, pscan->scan, BTC_SCAN_MAX1); + if (ver->fcxbtscan == 1) { + pscan_v1 = (struct rtw89_btc_fbtc_btscan_v1 *)pfinfo; + for (i = 0; i < BTC_SCAN_MAX1; i++) { + bt->scan_info_v1[i] = pscan_v1->scan[i]; + if (bt->scan_info_v1[i].win == 0 && + bt->scan_info_v1[i].intvl == 0) + scan_update = false; + } + } else if (ver->fcxbtscan == 2) { + pscan_v2 = (struct rtw89_btc_fbtc_btscan_v2 *)pfinfo; + for (i = 0; i < CXSCAN_MAX; i++) { + bt->scan_info_v2[i] = pscan_v2->para[i]; + if ((pscan_v2->type & BIT(i)) && + pscan_v2->para[i].win == 0 && + pscan_v2->para[i].intvl == 0) + scan_update = false; + } + } + if (scan_update) + bt->scan_info_update = 1; break; case BTC_RPT_TYPE_BT_AFH: if (ver->fcxbtafh == 2) { @@ -1102,8 +1123,13 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, break; case BTC_RPT_TYPE_BT_SCAN: pcinfo = &pfwinfo->rpt_fbtc_btscan.cinfo; - pfinfo = &pfwinfo->rpt_fbtc_btscan.finfo; - pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_btscan.finfo); + if (ver->fcxbtscan == 1) { + pfinfo = &pfwinfo->rpt_fbtc_btscan.finfo.v1; + pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_btscan.finfo.v1); + } else if (ver->fcxbtscan == 2) { + pfinfo = &pfwinfo->rpt_fbtc_btscan.finfo.v2; + pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_btscan.finfo.v2); + } pcinfo->req_fver = ver->fcxbtscan; break; case BTC_RPT_TYPE_BT_AFH: @@ -6346,11 +6372,40 @@ static void _show_bt_info(struct rtw89_dev *rtwdev, struct seq_file *m) cx->cnt_bt[BTC_BCNT_INFOSAME]); seq_printf(m, - " %-15s : Hi-rx = %d, Hi-tx = %d, Lo-rx = %d, Lo-tx = %d (bt_polut_wl_tx = %d)\n", + " %-15s : Hi-rx = %d, Hi-tx = %d, Lo-rx = %d, Lo-tx = %d (bt_polut_wl_tx = %d)", "[trx_req_cnt]", cx->cnt_bt[BTC_BCNT_HIPRI_RX], cx->cnt_bt[BTC_BCNT_HIPRI_TX], cx->cnt_bt[BTC_BCNT_LOPRI_RX], cx->cnt_bt[BTC_BCNT_LOPRI_TX], cx->cnt_bt[BTC_BCNT_POLUT]); + if (!bt->scan_info_update) { + rtw89_btc_fw_en_rpt(rtwdev, RPT_EN_BT_SCAN_INFO, true); + seq_puts(m, "\n"); + } else { + rtw89_btc_fw_en_rpt(rtwdev, RPT_EN_BT_SCAN_INFO, false); + if (ver->fcxbtscan == 1) { + seq_printf(m, + "(INQ:%d-%d/PAGE:%d-%d/LE:%d-%d/INIT:%d-%d)", + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_INQ].win), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_INQ].intvl), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_PAGE].win), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_PAGE].intvl), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_BLE].win), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_BLE].intvl), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_INIT].win), + le16_to_cpu(bt->scan_info_v1[BTC_SCAN_INIT].intvl)); + } else if (ver->fcxbtscan == 2) { + seq_printf(m, + "(BG:%d-%d/INIT:%d-%d/LE:%d-%d)", + le16_to_cpu(bt->scan_info_v2[CXSCAN_BG].win), + le16_to_cpu(bt->scan_info_v2[CXSCAN_BG].intvl), + le16_to_cpu(bt->scan_info_v2[CXSCAN_INIT].win), + le16_to_cpu(bt->scan_info_v2[CXSCAN_INIT].intvl), + le16_to_cpu(bt->scan_info_v2[CXSCAN_LE].win), + le16_to_cpu(bt->scan_info_v2[CXSCAN_LE].intvl)); + } + seq_puts(m, "\n"); + } + if (bt->enable.now && bt->ver_info.fw == 0) rtw89_btc_fw_en_rpt(rtwdev, RPT_EN_BT_VER_INFO, true); else diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index b0525f258d3c3..02fb3c73b1373 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1445,14 +1445,6 @@ struct rtw89_btc_wl_tx_limit_para { u16 tx_retry; }; -struct rtw89_btc_bt_scan_info { - u16 win; - u16 intvl; - u32 enable: 1; - u32 interlace: 1; - u32 rsvd: 30; -}; - enum rtw89_btc_bt_scan_type { BTC_SCAN_INQ = 0, BTC_SCAN_PAGE, @@ -1463,9 +1455,50 @@ enum rtw89_btc_bt_scan_type { BTC_SCAN_MAX1, }; +enum rtw89_btc_ble_scan_type { + CXSCAN_BG = 0, + CXSCAN_INIT, + CXSCAN_LE, + CXSCAN_MAX +}; + +#define RTW89_BTC_BTC_SCAN_V1_FLAG_ENABLE BIT(0) +#define RTW89_BTC_BTC_SCAN_V1_FLAG_INTERLACE BIT(1) + +struct rtw89_btc_bt_scan_info_v1 { + __le16 win; + __le16 intvl; + __le32 flags; +} __packed; + +struct rtw89_btc_bt_scan_info_v2 { + __le16 win; + __le16 intvl; +} __packed; + +struct rtw89_btc_fbtc_btscan_v1 { + u8 fver; /* btc_ver::fcxbtscan */ + u8 rsvd; + __le16 rsvd2; + struct rtw89_btc_bt_scan_info_v1 scan[BTC_SCAN_MAX1]; +} __packed; + +struct rtw89_btc_fbtc_btscan_v2 { + u8 fver; /* btc_ver::fcxbtscan */ + u8 type; + __le16 rsvd2; + struct rtw89_btc_bt_scan_info_v2 para[CXSCAN_MAX]; +} __packed; + +union rtw89_btc_fbtc_btscan { + struct rtw89_btc_fbtc_btscan_v1 v1; + struct rtw89_btc_fbtc_btscan_v2 v2; +}; + struct rtw89_btc_bt_info { struct rtw89_btc_bt_link_info link_info; - struct rtw89_btc_bt_scan_info scan_info[BTC_SCAN_MAX1]; + struct rtw89_btc_bt_scan_info_v1 scan_info_v1[BTC_SCAN_MAX1]; + struct rtw89_btc_bt_scan_info_v2 scan_info_v2[CXSCAN_MAX]; struct rtw89_btc_bt_ver_info ver_info; struct rtw89_btc_bool_sta_chg enable; struct rtw89_btc_bool_sta_chg inq_pag; @@ -1488,7 +1521,8 @@ struct rtw89_btc_bt_info { u32 run_patch_code: 1; u32 hi_lna_rx: 1; u32 scan_rx_low_pri: 1; - u32 rsvd: 21; + u32 scan_info_update: 1; + u32 rsvd: 20; }; struct rtw89_btc_cx { @@ -2006,13 +2040,6 @@ struct rtw89_btc_fbtc_btver { __le32 feature; } __packed; -struct rtw89_btc_fbtc_btscan { - u8 fver; /* btc_ver::fcxbtscan */ - u8 rsvd; - __le16 rsvd2; - u8 scan[6]; -} __packed; - struct rtw89_btc_fbtc_btafh { u8 fver; /* btc_ver::fcxbtafh */ u8 rsvd; @@ -2231,7 +2258,7 @@ struct rtw89_btc_rpt_fbtc_btver { struct rtw89_btc_rpt_fbtc_btscan { struct rtw89_btc_rpt_cmn_info cinfo; /* common info, by driver */ - struct rtw89_btc_fbtc_btscan finfo; /* info from fw */ + union rtw89_btc_fbtc_btscan finfo; /* info from fw */ }; struct rtw89_btc_rpt_fbtc_btafh { -- GitLab From 3ab7f9b90cc0a737e0bd8a312dc48814c4682867 Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Wed, 8 Mar 2023 13:32:25 +0800 Subject: [PATCH 0378/2078] wifi: rtw89: coex: Add v5 firmware cycle status report To support v5 version firmware cycle report, apply the related structure and functions. v5 cycle report add a group of status to show how the free-run/TDMA training goes to. It is a firmware mechanism that can auto adjust coexistence mode between TDMA and free run mechanism at 3 antenna solution. v5 version provide more reference data to let the mechanism make decision. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230308053225.24377-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/coex.c | 194 +++++++++++++++++++++- drivers/net/wireless/realtek/rtw89/coex.h | 5 + drivers/net/wireless/realtek/rtw89/core.h | 52 ++++++ 3 files changed, 249 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/coex.c b/drivers/net/wireless/realtek/rtw89/coex.c index b867e54feb896..3dbd4ee14c70d 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.c +++ b/drivers/net/wireless/realtek/rtw89/coex.c @@ -987,8 +987,8 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, void *rpt_content = NULL, *pfinfo = NULL; u8 rpt_type = 0; u16 wl_slot_set = 0, wl_slot_real = 0; - u32 trace_step = btc->ctrl.trace_step, rpt_len = 0, diff_t; - u32 cnt_leak_slot = 0, bt_slot_real = 0, cnt_rx_imr = 0; + u32 trace_step = btc->ctrl.trace_step, rpt_len = 0, diff_t = 0; + u32 cnt_leak_slot, bt_slot_real, bt_slot_set, cnt_rx_imr; u8 i; rtw89_debug(rtwdev, RTW89_DBG_BTC, @@ -1061,6 +1061,10 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, pfinfo = &pfwinfo->rpt_fbtc_cysta.finfo.v4; pcysta->v4 = pfwinfo->rpt_fbtc_cysta.finfo.v4; pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_cysta.finfo.v4); + } else if (ver->fcxcysta == 5) { + pfinfo = &pfwinfo->rpt_fbtc_cysta.finfo.v5; + pcysta->v5 = pfwinfo->rpt_fbtc_cysta.finfo.v5; + pcinfo->req_len = sizeof(pfwinfo->rpt_fbtc_cysta.finfo.v5); } else { goto err; } @@ -1406,6 +1410,54 @@ static u32 _chk_btc_report(struct rtw89_dev *rtwdev, le16_to_cpu(pcysta->v4.slot_cnt[CXST_B1])); _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_HANG, le16_to_cpu(pcysta->v4.cycles)); + } else if (ver->fcxcysta == 5) { + if (dm->fddt_train == BTC_FDDT_ENABLE) + break; + cnt_leak_slot = le16_to_cpu(pcysta->v5.slot_cnt[CXST_LK]); + cnt_rx_imr = le32_to_cpu(pcysta->v5.leak_slot.cnt_rximr); + + /* Check Leak-AP */ + if (cnt_leak_slot != 0 && cnt_rx_imr != 0 && + dm->tdma_now.rxflctrl) { + if (le16_to_cpu(pcysta->v5.cycles) >= BTC_CYSTA_CHK_PERIOD && + cnt_leak_slot < BTC_LEAK_AP_TH * cnt_rx_imr) + dm->leak_ap = 1; + } + + /* Check diff time between real WL slot and W1 slot */ + if (dm->tdma_now.type == CXTDMA_OFF) { + wl_slot_set = le16_to_cpu(dm->slot_now[CXST_W1].dur); + wl_slot_real = le16_to_cpu(pcysta->v5.cycle_time.tavg[CXT_WL]); + + if (wl_slot_real > wl_slot_set) + diff_t = wl_slot_real - wl_slot_set; + else + diff_t = wl_slot_set - wl_slot_real; + } + _chk_btc_err(rtwdev, BTC_DCNT_WL_SLOT_DRIFT, diff_t); + + /* Check diff time between real BT slot and EBT/E5G slot */ + bt_slot_set = btc->bt_req_len; + bt_slot_real = le16_to_cpu(pcysta->v5.cycle_time.tavg[CXT_BT]); + diff_t = 0; + if (dm->tdma_now.type == CXTDMA_OFF && + dm->tdma_now.ext_ctrl == CXECTL_EXT && + bt_slot_set != 0) { + if (bt_slot_set > bt_slot_real) + diff_t = bt_slot_set - bt_slot_real; + else + diff_t = bt_slot_real - bt_slot_set; + } + + _chk_btc_err(rtwdev, BTC_DCNT_BT_SLOT_DRIFT, diff_t); + _chk_btc_err(rtwdev, BTC_DCNT_E2G_HANG, + le16_to_cpu(pcysta->v5.slot_cnt[CXST_E2G])); + _chk_btc_err(rtwdev, BTC_DCNT_W1_HANG, + le16_to_cpu(pcysta->v5.slot_cnt[CXST_W1])); + _chk_btc_err(rtwdev, BTC_DCNT_B1_HANG, + le16_to_cpu(pcysta->v5.slot_cnt[CXST_B1])); + _chk_btc_err(rtwdev, BTC_DCNT_CYCLE_HANG, + le16_to_cpu(pcysta->v5.cycles)); } else { goto err; } @@ -5039,6 +5091,7 @@ void _run_coex(struct rtw89_dev *rtwdev, enum btc_reason_and_action reason) } dm->cnt_dm[BTC_DCNT_RUN]++; + dm->fddt_train = BTC_FDDT_DISABLE; if (btc->ctrl.always_freerun) { _action_freerun(rtwdev); @@ -6727,6 +6780,10 @@ static void _show_error(struct rtw89_dev *rtwdev, struct seq_file *m) pcysta->v4 = pfwinfo->rpt_fbtc_cysta.finfo.v4; except_cnt = pcysta->v4.except_cnt; exception_map = le32_to_cpu(pcysta->v4.except_map); + } else if (ver->fcxcysta == 5) { + pcysta->v5 = pfwinfo->rpt_fbtc_cysta.finfo.v5; + except_cnt = pcysta->v5.except_cnt; + exception_map = le32_to_cpu(pcysta->v5.except_map); } else { return; } @@ -7215,6 +7272,137 @@ static void _show_fbtc_cysta_v4(struct rtw89_dev *rtwdev, struct seq_file *m) } } +static void _show_fbtc_cysta_v5(struct rtw89_dev *rtwdev, struct seq_file *m) +{ + struct rtw89_btc *btc = &rtwdev->btc; + struct rtw89_btc_bt_a2dp_desc *a2dp = &btc->cx.bt.link_info.a2dp_desc; + struct rtw89_btc_btf_fwinfo *pfwinfo = &btc->fwinfo; + struct rtw89_btc_dm *dm = &btc->dm; + struct rtw89_btc_fbtc_a2dp_trx_stat_v4 *a2dp_trx; + struct rtw89_btc_fbtc_cysta_v5 *pcysta; + struct rtw89_btc_rpt_cmn_info *pcinfo; + u8 i, cnt = 0, slot_pair, divide_cnt; + u16 cycle, c_begin, c_end, store_index; + + pcinfo = &pfwinfo->rpt_fbtc_cysta.cinfo; + if (!pcinfo->valid) + return; + + pcysta = &pfwinfo->rpt_fbtc_cysta.finfo.v5; + seq_printf(m, + " %-15s : cycle:%d, bcn[all:%d/all_ok:%d/bt:%d/bt_ok:%d]", + "[cycle_cnt]", + le16_to_cpu(pcysta->cycles), + le16_to_cpu(pcysta->bcn_cnt[CXBCN_ALL]), + le16_to_cpu(pcysta->bcn_cnt[CXBCN_ALL_OK]), + le16_to_cpu(pcysta->bcn_cnt[CXBCN_BT_SLOT]), + le16_to_cpu(pcysta->bcn_cnt[CXBCN_BT_OK])); + + for (i = 0; i < CXST_MAX; i++) { + if (!le16_to_cpu(pcysta->slot_cnt[i])) + continue; + + seq_printf(m, ", %s:%d", id_to_slot(i), + le16_to_cpu(pcysta->slot_cnt[i])); + } + + if (dm->tdma_now.rxflctrl) + seq_printf(m, ", leak_rx:%d", + le32_to_cpu(pcysta->leak_slot.cnt_rximr)); + + if (pcysta->collision_cnt) + seq_printf(m, ", collision:%d", pcysta->collision_cnt); + + if (le16_to_cpu(pcysta->skip_cnt)) + seq_printf(m, ", skip:%d", + le16_to_cpu(pcysta->skip_cnt)); + + seq_puts(m, "\n"); + + seq_printf(m, " %-15s : avg_t[wl:%d/bt:%d/lk:%d.%03d]", + "[cycle_time]", + le16_to_cpu(pcysta->cycle_time.tavg[CXT_WL]), + le16_to_cpu(pcysta->cycle_time.tavg[CXT_BT]), + le16_to_cpu(pcysta->leak_slot.tavg) / 1000, + le16_to_cpu(pcysta->leak_slot.tavg) % 1000); + seq_printf(m, + ", max_t[wl:%d/bt:%d/lk:%d.%03d]\n", + le16_to_cpu(pcysta->cycle_time.tmax[CXT_WL]), + le16_to_cpu(pcysta->cycle_time.tmax[CXT_BT]), + le16_to_cpu(pcysta->leak_slot.tmax) / 1000, + le16_to_cpu(pcysta->leak_slot.tmax) % 1000); + + cycle = le16_to_cpu(pcysta->cycles); + if (cycle <= 1) + return; + + /* 1 cycle record 1 wl-slot and 1 bt-slot */ + slot_pair = BTC_CYCLE_SLOT_MAX / 2; + + if (cycle <= slot_pair) + c_begin = 1; + else + c_begin = cycle - slot_pair + 1; + + c_end = cycle; + + if (a2dp->exist) + divide_cnt = 3; + else + divide_cnt = BTC_CYCLE_SLOT_MAX / 4; + + if (c_begin > c_end) + return; + + for (cycle = c_begin; cycle <= c_end; cycle++) { + cnt++; + store_index = ((cycle - 1) % slot_pair) * 2; + + if (cnt % divide_cnt == 1) + seq_printf(m, " %-15s : ", "[cycle_step]"); + + seq_printf(m, "->b%02d", + le16_to_cpu(pcysta->slot_step_time[store_index])); + if (a2dp->exist) { + a2dp_trx = &pcysta->a2dp_trx[store_index]; + seq_printf(m, "(%d/%d/%dM/%d/%d/%d)", + a2dp_trx->empty_cnt, + a2dp_trx->retry_cnt, + a2dp_trx->tx_rate ? 3 : 2, + a2dp_trx->tx_cnt, + a2dp_trx->ack_cnt, + a2dp_trx->nack_cnt); + } + seq_printf(m, "->w%02d", + le16_to_cpu(pcysta->slot_step_time[store_index + 1])); + if (a2dp->exist) { + a2dp_trx = &pcysta->a2dp_trx[store_index + 1]; + seq_printf(m, "(%d/%d/%dM/%d/%d/%d)", + a2dp_trx->empty_cnt, + a2dp_trx->retry_cnt, + a2dp_trx->tx_rate ? 3 : 2, + a2dp_trx->tx_cnt, + a2dp_trx->ack_cnt, + a2dp_trx->nack_cnt); + } + if (cnt % divide_cnt == 0 || cnt == c_end) + seq_puts(m, "\n"); + } + + if (a2dp->exist) { + seq_printf(m, " %-15s : a2dp_ept:%d, a2dp_late:%d", + "[a2dp_t_sta]", + le16_to_cpu(pcysta->a2dp_ept.cnt), + le16_to_cpu(pcysta->a2dp_ept.cnt_timeout)); + + seq_printf(m, ", avg_t:%d, max_t:%d", + le16_to_cpu(pcysta->a2dp_ept.tavg), + le16_to_cpu(pcysta->a2dp_ept.tmax)); + + seq_puts(m, "\n"); + } +} + static void _show_fbtc_nullsta(struct rtw89_dev *rtwdev, struct seq_file *m) { struct rtw89_btc *btc = &rtwdev->btc; @@ -7419,6 +7607,8 @@ static void _show_fw_dm_msg(struct rtw89_dev *rtwdev, struct seq_file *m) _show_fbtc_cysta_v3(rtwdev, m); else if (ver->fcxcysta == 4) _show_fbtc_cysta_v4(rtwdev, m); + else if (ver->fcxcysta == 5) + _show_fbtc_cysta_v5(rtwdev, m); _show_fbtc_nullsta(rtwdev, m); diff --git a/drivers/net/wireless/realtek/rtw89/coex.h b/drivers/net/wireless/realtek/rtw89/coex.h index 401fb55df82b0..38cc53a505c36 100644 --- a/drivers/net/wireless/realtek/rtw89/coex.h +++ b/drivers/net/wireless/realtek/rtw89/coex.h @@ -66,6 +66,11 @@ enum btc_rssi_st { BTC_RSSI_ST_MAX }; +enum btc_fddt_en { + BTC_FDDT_DISABLE, + BTC_FDDT_ENABLE, +}; + #define BTC_RSSI_HIGH(_rssi_) \ ({typeof(_rssi_) __rssi = (_rssi_); \ ((__rssi == BTC_RSSI_ST_HIGH || \ diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 02fb3c73b1373..e447bfec508b8 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1894,6 +1894,11 @@ struct rtw89_btc_fbtc_cycle_time_info { __le16 tmaxdiff[CXT_MAX]; /* max wl-wl bt-bt cycle diff time */ } __packed; +struct rtw89_btc_fbtc_cycle_time_info_v5 { + __le16 tavg[CXT_MAX]; /* avg wl/bt cycle time */ + __le16 tmax[CXT_MAX]; /* max wl/bt cycle time */ +} __packed; + struct rtw89_btc_fbtc_a2dp_trx_stat { u8 empty_cnt; u8 retry_cnt; @@ -1950,6 +1955,21 @@ struct rtw89_btc_fbtc_cycle_fddt_info { #define RTW89_BTC_FDDT_CELL_TRAIN_STATE GENMASK(3, 0) #define RTW89_BTC_FDDT_CELL_TRAIN_PHASE GENMASK(7, 4) +struct rtw89_btc_fbtc_cycle_fddt_info_v5 { + __le16 train_cycle; + __le16 tp; + + s8 tx_power; /* absolute Tx power (dBm), 0xff-> no BTC control */ + s8 bt_tx_power; /* decrease Tx power (dB) */ + s8 bt_rx_gain; /* LNA constrain level */ + u8 no_empty_cnt; + + u8 rssi; /* [7:4] -> bt_rssi_level, [3:0]-> wl_rssi_level */ + u8 cn; /* condition_num */ + u8 train_status; /* [7:4]-> train-state, [3:0]-> train-phase */ + u8 train_result; /* refer to enum btc_fddt_check_map */ +} __packed; + struct rtw89_btc_fbtc_fddt_cell_status { s8 wl_tx_pwr; s8 bt_tx_pwr; @@ -1957,6 +1977,12 @@ struct rtw89_btc_fbtc_fddt_cell_status { u8 state_phase; /* [0:3] train state, [4:7] train phase */ } __packed; +struct rtw89_btc_fbtc_fddt_cell_status_v5 { + s8 wl_tx_pwr; + s8 bt_tx_pwr; + s8 bt_rx_gain; +} __packed; + struct rtw89_btc_fbtc_cysta_v3 { /* statistics for cycles */ u8 fver; u8 rsvd; @@ -2002,10 +2028,35 @@ struct rtw89_btc_fbtc_cysta_v4 { /* statistics for cycles */ __le32 except_map; } __packed; +struct rtw89_btc_fbtc_cysta_v5 { /* statistics for cycles */ + u8 fver; + u8 rsvd; + u8 collision_cnt; /* counter for event/timer occur at the same time */ + u8 except_cnt; + u8 wl_rx_err_ratio[BTC_CYCLE_SLOT_MAX]; + + __le16 skip_cnt; + __le16 cycles; /* total cycle number */ + + __le16 slot_step_time[BTC_CYCLE_SLOT_MAX]; /* record the wl/bt slot time */ + __le16 slot_cnt[CXST_MAX]; /* slot count */ + __le16 bcn_cnt[CXBCN_MAX]; + struct rtw89_btc_fbtc_cycle_time_info_v5 cycle_time; + struct rtw89_btc_fbtc_cycle_leak_info leak_slot; + struct rtw89_btc_fbtc_cycle_a2dp_empty_info a2dp_ept; + struct rtw89_btc_fbtc_a2dp_trx_stat_v4 a2dp_trx[BTC_CYCLE_SLOT_MAX]; + struct rtw89_btc_fbtc_cycle_fddt_info_v5 fddt_trx[BTC_CYCLE_SLOT_MAX]; + struct rtw89_btc_fbtc_fddt_cell_status_v5 fddt_cells[FDD_TRAIN_WL_DIRECTION] + [FDD_TRAIN_WL_RSSI_LEVEL] + [FDD_TRAIN_BT_RSSI_LEVEL]; + __le32 except_map; +} __packed; + union rtw89_btc_fbtc_cysta_info { struct rtw89_btc_fbtc_cysta_v2 v2; struct rtw89_btc_fbtc_cysta_v3 v3; struct rtw89_btc_fbtc_cysta_v4 v4; + struct rtw89_btc_fbtc_cysta_v5 v5; }; struct rtw89_btc_fbtc_cynullsta_v1 { /* cycle null statistics */ @@ -2123,6 +2174,7 @@ struct rtw89_btc_dm { u32 wl_only: 1; u32 wl_fw_cx_offload: 1; u32 freerun: 1; + u32 fddt_train: 1; u32 wl_ps_ctrl: 2; u32 wl_mimo_ps: 1; u32 leak_ap: 1; -- GitLab From 48181d285623198c33bb9698992502687b258efa Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 9 Mar 2023 10:16:36 +0800 Subject: [PATCH 0379/2078] wifi: rtw88: fix memory leak in rtw_usb_probe() drivers/net/wireless/realtek/rtw88/usb.c:876 rtw_usb_probe() warn: 'hw' from ieee80211_alloc_hw() not released on lines: 811 Fix this by modifying return to a goto statement. Signed-off-by: Dongliang Mu Reviewed-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230309021636.528601-1-dzm91@hust.edu.cn --- drivers/net/wireless/realtek/rtw88/usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 2a8336b1847a5..68e1b782d1992 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -808,7 +808,7 @@ int rtw_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) ret = rtw_usb_alloc_rx_bufs(rtwusb); if (ret) - return ret; + goto err_release_hw; ret = rtw_core_init(rtwdev); if (ret) -- GitLab From 0da40e018fd034d87c9460123fa7f897b69fdee7 Mon Sep 17 00:00:00 2001 From: Jisoo Jang Date: Thu, 9 Mar 2023 19:44:57 +0900 Subject: [PATCH 0380/2078] wifi: brcmfmac: slab-out-of-bounds read in brcmf_get_assoc_ies() Fix a slab-out-of-bounds read that occurs in kmemdup() called from brcmf_get_assoc_ies(). The bug could occur when assoc_info->req_len, data from a URB provided by a USB device, is bigger than the size of buffer which is defined as WL_EXTRA_BUF_MAX. Add the size check for req_len/resp_len of assoc_info. Found by a modified version of syzkaller. [ 46.592467][ T7] ================================================================== [ 46.594687][ T7] BUG: KASAN: slab-out-of-bounds in kmemdup+0x3e/0x50 [ 46.596572][ T7] Read of size 3014656 at addr ffff888019442000 by task kworker/0:1/7 [ 46.598575][ T7] [ 46.599157][ T7] CPU: 0 PID: 7 Comm: kworker/0:1 Tainted: G O 5.14.0+ #145 [ 46.601333][ T7] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 46.604360][ T7] Workqueue: events brcmf_fweh_event_worker [ 46.605943][ T7] Call Trace: [ 46.606584][ T7] dump_stack_lvl+0x8e/0xd1 [ 46.607446][ T7] print_address_description.constprop.0.cold+0x93/0x334 [ 46.608610][ T7] ? kmemdup+0x3e/0x50 [ 46.609341][ T7] kasan_report.cold+0x79/0xd5 [ 46.610151][ T7] ? kmemdup+0x3e/0x50 [ 46.610796][ T7] kasan_check_range+0x14e/0x1b0 [ 46.611691][ T7] memcpy+0x20/0x60 [ 46.612323][ T7] kmemdup+0x3e/0x50 [ 46.612987][ T7] brcmf_get_assoc_ies+0x967/0xf60 [ 46.613904][ T7] ? brcmf_notify_vif_event+0x3d0/0x3d0 [ 46.614831][ T7] ? lock_chain_count+0x20/0x20 [ 46.615683][ T7] ? mark_lock.part.0+0xfc/0x2770 [ 46.616552][ T7] ? lock_chain_count+0x20/0x20 [ 46.617409][ T7] ? mark_lock.part.0+0xfc/0x2770 [ 46.618244][ T7] ? lock_chain_count+0x20/0x20 [ 46.619024][ T7] brcmf_bss_connect_done.constprop.0+0x241/0x2e0 [ 46.620019][ T7] ? brcmf_parse_configure_security.isra.0+0x2a0/0x2a0 [ 46.620818][ T7] ? __lock_acquire+0x181f/0x5790 [ 46.621462][ T7] brcmf_notify_connect_status+0x448/0x1950 [ 46.622134][ T7] ? rcu_read_lock_bh_held+0xb0/0xb0 [ 46.622736][ T7] ? brcmf_cfg80211_join_ibss+0x7b0/0x7b0 [ 46.623390][ T7] ? find_held_lock+0x2d/0x110 [ 46.623962][ T7] ? brcmf_fweh_event_worker+0x19f/0xc60 [ 46.624603][ T7] ? mark_held_locks+0x9f/0xe0 [ 46.625145][ T7] ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 [ 46.625871][ T7] ? brcmf_cfg80211_join_ibss+0x7b0/0x7b0 [ 46.626545][ T7] brcmf_fweh_call_event_handler.isra.0+0x90/0x100 [ 46.627338][ T7] brcmf_fweh_event_worker+0x557/0xc60 [ 46.627962][ T7] ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 [ 46.628736][ T7] ? rcu_read_lock_sched_held+0xa1/0xd0 [ 46.629396][ T7] ? rcu_read_lock_bh_held+0xb0/0xb0 [ 46.629970][ T7] ? lockdep_hardirqs_on_prepare+0x273/0x3e0 [ 46.630649][ T7] process_one_work+0x92b/0x1460 [ 46.631205][ T7] ? pwq_dec_nr_in_flight+0x330/0x330 [ 46.631821][ T7] ? rwlock_bug.part.0+0x90/0x90 [ 46.632347][ T7] worker_thread+0x95/0xe00 [ 46.632832][ T7] ? __kthread_parkme+0x115/0x1e0 [ 46.633393][ T7] ? process_one_work+0x1460/0x1460 [ 46.633957][ T7] kthread+0x3a1/0x480 [ 46.634369][ T7] ? set_kthread_struct+0x120/0x120 [ 46.634933][ T7] ret_from_fork+0x1f/0x30 [ 46.635431][ T7] [ 46.635687][ T7] Allocated by task 7: [ 46.636151][ T7] kasan_save_stack+0x1b/0x40 [ 46.636628][ T7] __kasan_kmalloc+0x7c/0x90 [ 46.637108][ T7] kmem_cache_alloc_trace+0x19e/0x330 [ 46.637696][ T7] brcmf_cfg80211_attach+0x4a0/0x4040 [ 46.638275][ T7] brcmf_attach+0x389/0xd40 [ 46.638739][ T7] brcmf_usb_probe+0x12de/0x1690 [ 46.639279][ T7] usb_probe_interface+0x2aa/0x760 [ 46.639820][ T7] really_probe+0x205/0xb70 [ 46.640342][ T7] __driver_probe_device+0x311/0x4b0 [ 46.640876][ T7] driver_probe_device+0x4e/0x150 [ 46.641445][ T7] __device_attach_driver+0x1cc/0x2a0 [ 46.642000][ T7] bus_for_each_drv+0x156/0x1d0 [ 46.642543][ T7] __device_attach+0x23f/0x3a0 [ 46.643065][ T7] bus_probe_device+0x1da/0x290 [ 46.643644][ T7] device_add+0xb7b/0x1eb0 [ 46.644130][ T7] usb_set_configuration+0xf59/0x16f0 [ 46.644720][ T7] usb_generic_driver_probe+0x82/0xa0 [ 46.645295][ T7] usb_probe_device+0xbb/0x250 [ 46.645786][ T7] really_probe+0x205/0xb70 [ 46.646258][ T7] __driver_probe_device+0x311/0x4b0 [ 46.646804][ T7] driver_probe_device+0x4e/0x150 [ 46.647387][ T7] __device_attach_driver+0x1cc/0x2a0 [ 46.647926][ T7] bus_for_each_drv+0x156/0x1d0 [ 46.648454][ T7] __device_attach+0x23f/0x3a0 [ 46.648939][ T7] bus_probe_device+0x1da/0x290 [ 46.649478][ T7] device_add+0xb7b/0x1eb0 [ 46.649936][ T7] usb_new_device.cold+0x49c/0x1029 [ 46.650526][ T7] hub_event+0x1c98/0x3950 [ 46.650975][ T7] process_one_work+0x92b/0x1460 [ 46.651535][ T7] worker_thread+0x95/0xe00 [ 46.651991][ T7] kthread+0x3a1/0x480 [ 46.652413][ T7] ret_from_fork+0x1f/0x30 [ 46.652885][ T7] [ 46.653131][ T7] The buggy address belongs to the object at ffff888019442000 [ 46.653131][ T7] which belongs to the cache kmalloc-2k of size 2048 [ 46.654669][ T7] The buggy address is located 0 bytes inside of [ 46.654669][ T7] 2048-byte region [ffff888019442000, ffff888019442800) [ 46.656137][ T7] The buggy address belongs to the page: [ 46.656720][ T7] page:ffffea0000651000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x19440 [ 46.657792][ T7] head:ffffea0000651000 order:3 compound_mapcount:0 compound_pincount:0 [ 46.658673][ T7] flags: 0x100000000010200(slab|head|node=0|zone=1) [ 46.659422][ T7] raw: 0100000000010200 0000000000000000 dead000000000122 ffff888100042000 [ 46.660363][ T7] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 [ 46.661236][ T7] page dumped because: kasan: bad access detected [ 46.661956][ T7] page_owner tracks the page as allocated [ 46.662588][ T7] page last allocated via order 3, migratetype Unmovable, gfp_mask 0x52a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP), pid 7, ts 31136961085, free_ts 0 [ 46.664271][ T7] prep_new_page+0x1aa/0x240 [ 46.664763][ T7] get_page_from_freelist+0x159a/0x27c0 [ 46.665340][ T7] __alloc_pages+0x2da/0x6a0 [ 46.665847][ T7] alloc_pages+0xec/0x1e0 [ 46.666308][ T7] allocate_slab+0x380/0x4e0 [ 46.666770][ T7] ___slab_alloc+0x5bc/0x940 [ 46.667264][ T7] __slab_alloc+0x6d/0x80 [ 46.667712][ T7] kmem_cache_alloc_trace+0x30a/0x330 [ 46.668299][ T7] brcmf_usbdev_qinit.constprop.0+0x50/0x470 [ 46.668885][ T7] brcmf_usb_probe+0xc97/0x1690 [ 46.669438][ T7] usb_probe_interface+0x2aa/0x760 [ 46.669988][ T7] really_probe+0x205/0xb70 [ 46.670487][ T7] __driver_probe_device+0x311/0x4b0 [ 46.671031][ T7] driver_probe_device+0x4e/0x150 [ 46.671604][ T7] __device_attach_driver+0x1cc/0x2a0 [ 46.672192][ T7] bus_for_each_drv+0x156/0x1d0 [ 46.672739][ T7] page_owner free stack trace missing [ 46.673335][ T7] [ 46.673620][ T7] Memory state around the buggy address: [ 46.674213][ T7] ffff888019442700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 46.675083][ T7] ffff888019442780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 46.675994][ T7] >ffff888019442800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 46.676875][ T7] ^ [ 46.677323][ T7] ffff888019442880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 46.678190][ T7] ffff888019442900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 46.679052][ T7] ================================================================== [ 46.679945][ T7] Disabling lock debugging due to kernel taint [ 46.680725][ T7] Kernel panic - not syncing: Reviewed-by: Arend van Spriel Signed-off-by: Jisoo Jang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230309104457.22628-1-jisoo.jang@yonsei.ac.kr --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 548799fefb4bf..de8a2e27f49c7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -6280,6 +6280,11 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg, (struct brcmf_cfg80211_assoc_ielen_le *)cfg->extra_buf; req_len = le32_to_cpu(assoc_info->req_len); resp_len = le32_to_cpu(assoc_info->resp_len); + if (req_len > WL_EXTRA_BUF_MAX || resp_len > WL_EXTRA_BUF_MAX) { + bphy_err(drvr, "invalid lengths in assoc info: req %u resp %u\n", + req_len, resp_len); + return -EINVAL; + } if (req_len) { err = brcmf_fil_iovar_data_get(ifp, "assoc_req_ies", cfg->extra_buf, -- GitLab From 504ce971f260c178fa625f1278d9a762bc366504 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:07 -0800 Subject: [PATCH 0381/2078] ice: re-order ice_mbx_reset_snapshot function A future change is going to refactor the VF mailbox overflow detection logic, including modifying ice_mbx_reset_snapshot and its callers. To make this change easier to review, first move the ice_mbx_reset_snapshot function higher in the ice_vf_mbx.c file. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 48 ++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index f56fa94ff3d0d..2fe9a9504914e 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -130,6 +130,30 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) */ #define ICE_IGNORE_MAX_MSG_CNT 0xFFFF +/** + * ice_mbx_reset_snapshot - Reset mailbox snapshot structure + * @snap: pointer to mailbox snapshot structure in the ice_hw struct + * + * Reset the mailbox snapshot structure and clear VF counter array. + */ +static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) +{ + u32 vfcntr_len; + + if (!snap || !snap->mbx_vf.vf_cntr) + return; + + /* Clear VF counters. */ + vfcntr_len = snap->mbx_vf.vfcntr_len; + if (vfcntr_len) + memset(snap->mbx_vf.vf_cntr, 0, + (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); + + /* Reset mailbox snapshot for a new capture. */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); + snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + /** * ice_mbx_traverse - Pass through mailbox snapshot * @hw: pointer to the HW struct @@ -201,30 +225,6 @@ ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, return 0; } -/** - * ice_mbx_reset_snapshot - Reset mailbox snapshot structure - * @snap: pointer to mailbox snapshot structure in the ice_hw struct - * - * Reset the mailbox snapshot structure and clear VF counter array. - */ -static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) -{ - u32 vfcntr_len; - - if (!snap || !snap->mbx_vf.vf_cntr) - return; - - /* Clear VF counters. */ - vfcntr_len = snap->mbx_vf.vfcntr_len; - if (vfcntr_len) - memset(snap->mbx_vf.vf_cntr, 0, - (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); - - /* Reset mailbox snapshot for a new capture. */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); - snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; -} - /** * ice_mbx_vf_state_handler - Handle states of the overflow algorithm * @hw: pointer to the HW struct -- GitLab From 28756d9ec93e6588b1c3a00cc9123e238a71c709 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:08 -0800 Subject: [PATCH 0382/2078] ice: convert ice_mbx_clear_malvf to void and use WARN The ice_mbx_clear_malvf function checks for a few error conditions before clearing the appropriate data. These error conditions are really warnings that should never occur in a properly initialized driver. Every caller of ice_mbx_clear_malvf just prints a dev_dbg message on failure which will generally be ignored. Convert this function to void and switch the error return values to WARN_ON. This will make any potentially misconfiguration more visible and makes future refactors that involve changing how we store the malicious VF data easier. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 6 ++---- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 12 ++++-------- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 16 +++++++--------- drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 2 +- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 96a64c25e2ef7..7107c279752ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -204,10 +204,8 @@ void ice_free_vfs(struct ice_pf *pf) } /* clear malicious info since the VF is getting released */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id); mutex_unlock(&vf->cfg_lock); } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 0e57bd1b85fd4..116b43588389d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -496,10 +496,8 @@ void ice_reset_all_vfs(struct ice_pf *pf) /* clear all malicious info if the VFs are getting reset */ ice_for_each_vf(pf, bkt, vf) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id); /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(ICE_VF_DIS, pf->state)) { @@ -705,10 +703,8 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id); out_unlock: if (flags & ICE_VF_RESET_LOCK) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index 2fe9a9504914e..9f6acfeb0fc6b 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -392,19 +392,19 @@ ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, * that the new VF loaded is not considered malicious before going * through the overflow detection algorithm. */ -int +void ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, u16 bitmap_len, u16 vf_id) { - if (!snap || !all_malvfs) - return -EINVAL; + if (WARN_ON(!snap || !all_malvfs)) + return; - if (bitmap_len < snap->mbx_vf.vfcntr_len) - return -EINVAL; + if (WARN_ON(bitmap_len < snap->mbx_vf.vfcntr_len)) + return; /* Ensure VF ID value is not larger than bitmap or VF counter length */ - if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len) - return -EIO; + if (WARN_ON(vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len)) + return; /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */ clear_bit(vf_id, all_malvfs); @@ -416,8 +416,6 @@ ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, * values in the mailbox overflow detection algorithm. */ snap->mbx_vf.vf_cntr[vf_id] = 0; - - return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index 582716e6d5f98..be593b9516428 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -22,7 +22,7 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); int ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, u16 vf_id, bool *is_mal_vf); -int +void ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, u16 bitmap_len, u16 vf_id); int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); -- GitLab From e4eaf8938852d092fa447b32adb8ec233621d86a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:09 -0800 Subject: [PATCH 0383/2078] ice: track malicious VFs in new ice_mbx_vf_info structure Currently the PF tracks malicious VFs in a malvfs bitmap which is used by the ice_mbx_clear_malvf and ice_mbx_report_malvf functions. This bitmap is used to ensure that we only report a VF as malicious once rather than continuously spamming the event log. This mechanism of storage for the malicious indication works well enough for SR-IOV. However, it will not work with Scalable IOV. This is because Scalable IOV VFs can be allocated dynamically and might change VF ID when their underlying VSI changes. To support this, the mailbox overflow logic will need to be refactored. First, introduce a new ice_mbx_vf_info structure which will be used to store data about a VF. Embed this structure in the struct ice_vf, and ensure it gets initialized when a new VF is created. For now this only stores the malicious indicator bit. Pass a pointer to the VF's mbx_info structure instead of using a bitmap to keep track of these bits. A future change will extend this structure and the rest of the logic associated with the overflow detection. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 7 +- drivers/net/ethernet/intel/ice/ice_type.h | 7 ++ drivers/net/ethernet/intel/ice/ice_vf_lib.c | 10 +-- drivers/net/ethernet/intel/ice/ice_vf_lib.h | 2 +- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 71 +++++++++------------ drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 9 +-- 6 files changed, 53 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 7107c279752ac..44b94276df91f 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -204,8 +204,8 @@ void ice_free_vfs(struct ice_pf *pf) } /* clear malicious info since the VF is getting released */ - ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, + &vf->mbx_info); mutex_unlock(&vf->cfg_lock); } @@ -1828,8 +1828,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, /* if the VF is malicious and we haven't let the user * know about it, then let them know now */ - status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf_id, + status = ice_mbx_report_malvf(&pf->hw, &vf->mbx_info, &report_vf); if (status) dev_dbg(dev, "Error reporting malicious VF\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index e3f622cad4255..d243a0c59ea4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -794,6 +794,13 @@ struct ice_mbx_vf_counter { u32 vfcntr_len; }; +/* Structure used to track a single VF's messages on the mailbox: + * 1. malicious: whether this VF has been detected as malicious before + */ +struct ice_mbx_vf_info { + u8 malicious : 1; +}; + /* Structure to hold data relevant to the captured static snapshot * of the PF-VF mailbox. */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 116b43588389d..69e89e960950a 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -496,8 +496,8 @@ void ice_reset_all_vfs(struct ice_pf *pf) /* clear all malicious info if the VFs are getting reset */ ice_for_each_vf(pf, bkt, vf) - ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, + &vf->mbx_info); /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(ICE_VF_DIS, pf->state)) { @@ -703,8 +703,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ - ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_SRIOV_VFS, vf->vf_id); + ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, &vf->mbx_info); out_unlock: if (flags & ICE_VF_RESET_LOCK) @@ -760,6 +759,9 @@ void ice_initialize_vf_entry(struct ice_vf *vf) ice_vf_ctrl_invalidate_vsi(vf); ice_vf_fdir_init(vf); + /* Initialize mailbox info for this VF */ + ice_mbx_init_vf_info(&pf->hw, &vf->mbx_info); + mutex_init(&vf->cfg_lock); } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index ef30f05b5d02e..e3cda6fb71ab1 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -74,7 +74,6 @@ struct ice_vfs { u16 num_qps_per; /* number of queue pairs per VF */ u16 num_msix_per; /* number of MSI-X vectors per VF */ unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */ - DECLARE_BITMAP(malvfs, ICE_MAX_SRIOV_VFS); /* malicious VF indicator */ }; /* VF information structure */ @@ -105,6 +104,7 @@ struct ice_vf { DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */ struct virtchnl_vlan_caps vlan_v2_caps; + struct ice_mbx_vf_info mbx_info; u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ u8 trusted:1; u8 spoofchk:1; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index 9f6acfeb0fc6b..2e769bd0bf7ee 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -345,35 +345,23 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, /** * ice_mbx_report_malvf - Track and note malicious VF * @hw: pointer to the HW struct - * @all_malvfs: all malicious VFs tracked by PF - * @bitmap_len: length of bitmap in bits - * @vf_id: relative virtual function ID of the malicious VF + * @vf_info: the mailbox tracking info structure for a VF * @report_malvf: boolean to indicate if malicious VF must be reported * - * This function will update a bitmap that keeps track of the malicious - * VFs attached to the PF. A malicious VF must be reported only once if - * discovered between VF resets or loading so the function checks - * the input vf_id against the bitmap to verify if the VF has been - * detected in any previous mailbox iterations. + * This function updates the malicious indicator bit in the VF mailbox + * tracking structure. A malicious VF must be reported only once if discovered + * between VF resets or loading so the function first checks if the VF has + * already been detected in any previous mailbox iterations. */ int -ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id, bool *report_malvf) +ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, + bool *report_malvf) { - if (!all_malvfs || !report_malvf) - return -EINVAL; - - *report_malvf = false; - - if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len) + if (!report_malvf) return -EINVAL; - if (vf_id >= bitmap_len) - return -EIO; - - /* If the vf_id is found in the bitmap set bit and boolean to true */ - if (!test_and_set_bit(vf_id, all_malvfs)) - *report_malvf = true; + *report_malvf = !vf_info->malicious; + vf_info->malicious = 1; return 0; } @@ -381,33 +369,24 @@ ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, /** * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID * @snap: pointer to the mailbox snapshot structure - * @all_malvfs: all malicious VFs tracked by PF - * @bitmap_len: length of bitmap in bits * @vf_id: relative virtual function ID of the malicious VF + * @vf_info: mailbox tracking structure for this VF * - * In case of a VF reset, this function can be called to clear - * the bit corresponding to the VF ID in the bitmap tracking all - * malicious VFs attached to the PF. The function also clears the - * VF counter array at the index of the VF ID. This is to ensure - * that the new VF loaded is not considered malicious before going - * through the overflow detection algorithm. - */ +* In case of a VF reset, this function shall be called to clear the VF's +* current mailbox tracking state. +*/ void -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id) +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, u16 vf_id, + struct ice_mbx_vf_info *vf_info) { - if (WARN_ON(!snap || !all_malvfs)) - return; - - if (WARN_ON(bitmap_len < snap->mbx_vf.vfcntr_len)) + if (WARN_ON(!snap)) return; /* Ensure VF ID value is not larger than bitmap or VF counter length */ - if (WARN_ON(vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len)) + if (WARN_ON(vf_id >= snap->mbx_vf.vfcntr_len)) return; - /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */ - clear_bit(vf_id, all_malvfs); + vf_info->malicious = 0; /* Clear the VF counter in the mailbox snapshot structure for that VF ID. * This is to ensure that if a VF is unloaded and a new one brought back @@ -418,6 +397,18 @@ ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, snap->mbx_vf.vf_cntr[vf_id] = 0; } +/** + * ice_mbx_init_vf_info - Initialize a new VF mailbox tracking info + * @hw: pointer to the hardware structure + * @vf_info: the mailbox tracking info structure for a VF + * + * Initialize a VF mailbox tracking info structure. + */ +void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info) +{ + vf_info->malicious = 0; +} + /** * ice_mbx_init_snapshot - Initialize mailbox snapshot structure * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index be593b9516428..2613cba61ac7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -23,13 +23,14 @@ int ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, u16 vf_id, bool *is_mal_vf); void -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id); +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, u16 vf_id, + struct ice_mbx_vf_info *vf_info); +void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info); int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); void ice_mbx_deinit_snapshot(struct ice_hw *hw); int -ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id, bool *report_malvf); +ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, + bool *report_malvf); #else /* CONFIG_PCI_IOV */ static inline int ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw, -- GitLab From 8cd8a6b17d275a45e3722d0215f6115b687c8c3e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:10 -0800 Subject: [PATCH 0384/2078] ice: move VF overflow message count into struct ice_mbx_vf_info The ice driver has some logic in ice_vf_mbx.c used to detect potentially malicious VF behavior with regards to overflowing the PF mailbox. This logic currently stores message counts in struct ice_mbx_vf_counter.vf_cntr as an array. This array is allocated during initialization with ice_mbx_init_snapshot. This logic makes sense for SR-IOV where all VFs are allocated at once up front. However, in the future with Scalable IOV this logic will not work. VFs can be added and removed dynamically. We could try to keep the vf_cntr array for the maximum possible number of VFs, but this is a waste of memory. Use the recently introduced struct ice_mbx_vf_info structure to store the message count. Pass a pointer to the mbx_info for a VF instead of using its VF ID. Replace the array of VF message counts with a linked list that tracks all currently active mailbox tracking info structures. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 9 +- drivers/net/ethernet/intel/ice/ice_type.h | 18 +-- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 7 +- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 167 +++++++------------- drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 8 +- 5 files changed, 69 insertions(+), 140 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 44b94276df91f..8820f269bfdf4 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -204,8 +204,7 @@ void ice_free_vfs(struct ice_pf *pf) } /* clear malicious info since the VF is getting released */ - ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, - &vf->mbx_info); + list_del(&vf->mbx_info.list_entry); mutex_unlock(&vf->cfg_lock); } @@ -1025,9 +1024,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EBUSY; } - err = ice_mbx_init_snapshot(&pf->hw, num_vfs); - if (err) - return err; + ice_mbx_init_snapshot(&pf->hw); err = ice_pci_sriov_ena(pf, num_vfs); if (err) { @@ -1818,7 +1815,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; /* check to see if we have a malicious VF */ - status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); + status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, &vf->mbx_info, &malvf); if (status) goto out_put_vf; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index d243a0c59ea4d..a09556e57803d 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -784,20 +784,14 @@ struct ice_mbx_snap_buffer_data { u16 max_num_msgs_mbx; }; -/* Structure to track messages sent by VFs on mailbox: - * 1. vf_cntr: a counter array of VFs to track the number of - * asynchronous messages sent by each VF - * 2. vfcntr_len: number of entries in VF counter array - */ -struct ice_mbx_vf_counter { - u32 *vf_cntr; - u32 vfcntr_len; -}; - /* Structure used to track a single VF's messages on the mailbox: - * 1. malicious: whether this VF has been detected as malicious before + * 1. list_entry: linked list entry node + * 2. msg_count: the number of asynchronous messages sent by this VF + * 3. malicious: whether this VF has been detected as malicious before */ struct ice_mbx_vf_info { + struct list_head list_entry; + u32 msg_count; u8 malicious : 1; }; @@ -806,7 +800,7 @@ struct ice_mbx_vf_info { */ struct ice_mbx_snapshot { struct ice_mbx_snap_buffer_data mbx_buf; - struct ice_mbx_vf_counter mbx_vf; + struct list_head mbx_vf; }; /* Structure to hold data to be used for capturing or updating a diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 69e89e960950a..89fd6982df093 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -496,8 +496,7 @@ void ice_reset_all_vfs(struct ice_pf *pf) /* clear all malicious info if the VFs are getting reset */ ice_for_each_vf(pf, bkt, vf) - ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, - &vf->mbx_info); + ice_mbx_clear_malvf(&vf->mbx_info); /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(ICE_VF_DIS, pf->state)) { @@ -599,12 +598,10 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; struct device *dev; - struct ice_hw *hw; int err = 0; bool rsd; dev = ice_pf_to_dev(pf); - hw = &pf->hw; if (flags & ICE_VF_RESET_NOTIFY) ice_notify_vf_reset(vf); @@ -703,7 +700,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ - ice_mbx_clear_malvf(&hw->mbx_snapshot, vf->vf_id, &vf->mbx_info); + ice_mbx_clear_malvf(&vf->mbx_info); out_unlock: if (flags & ICE_VF_RESET_LOCK) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index 2e769bd0bf7ee..4bfed5fb3a885 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -93,36 +93,31 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) * * 2. When the caller starts processing its mailbox queue in response to an * interrupt, the structure ice_mbx_snapshot is expected to be cleared before - * the algorithm can be run for the first time for that interrupt. This can be - * done via ice_mbx_reset_snapshot(). + * the algorithm can be run for the first time for that interrupt. This + * requires calling ice_mbx_reset_snapshot() as well as calling + * ice_mbx_reset_vf_info() for each VF tracking structure. * * 3. For every message read by the caller from the MBX Queue, the caller must * call the detection algorithm's entry function ice_mbx_vf_state_handler(). * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is * filled as it is required to be passed to the algorithm. * - * 4. Every time a message is read from the MBX queue, a VFId is received which - * is passed to the state handler. The boolean output is_malvf of the state - * handler ice_mbx_vf_state_handler() serves as an indicator to the caller - * whether this VF is malicious or not. + * 4. Every time a message is read from the MBX queue, a tracking structure + * for the VF must be passed to the state handler. The boolean output + * report_malvf from ice_mbx_vf_state_handler() serves as an indicator to the + * caller whether it must report this VF as malicious or not. * * 5. When a VF is identified to be malicious, the caller can send a message - * to the system administrator. The caller can invoke ice_mbx_report_malvf() - * to help determine if a malicious VF is to be reported or not. This function - * requires the caller to maintain a global bitmap to track all malicious VFs - * and pass that to ice_mbx_report_malvf() along with the VFID which was identified - * to be malicious by ice_mbx_vf_state_handler(). + * to the system administrator. * - * 6. The global bitmap maintained by PF can be cleared completely if PF is in - * reset or the bit corresponding to a VF can be cleared if that VF is in reset. - * When a VF is shut down and brought back up, we assume that the new VF - * brought up is not malicious and hence report it if found malicious. + * 6. The PF is responsible for maintaining the struct ice_mbx_vf_info + * structure for each VF. The PF should clear the VF tracking structure if the + * VF is reset. When a VF is shut down and brought back up, we will then + * assume that the new VF is not malicious and may report it again if we + * detect it again. * * 7. The function ice_mbx_reset_snapshot() is called to reset the information * in ice_mbx_snapshot for every new mailbox interrupt handled. - * - * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated - * when driver is unloaded. */ #define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M) /* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that @@ -132,26 +127,21 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) /** * ice_mbx_reset_snapshot - Reset mailbox snapshot structure - * @snap: pointer to mailbox snapshot structure in the ice_hw struct - * - * Reset the mailbox snapshot structure and clear VF counter array. + * @snap: pointer to the mailbox snapshot */ static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) { - u32 vfcntr_len; - - if (!snap || !snap->mbx_vf.vf_cntr) - return; + struct ice_mbx_vf_info *vf_info; - /* Clear VF counters. */ - vfcntr_len = snap->mbx_vf.vfcntr_len; - if (vfcntr_len) - memset(snap->mbx_vf.vf_cntr, 0, - (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); - - /* Reset mailbox snapshot for a new capture. */ + /* Clear mbx_buf in the mailbox snaphot structure and setting the + * mailbox snapshot state to a new capture. + */ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; + + /* Reset message counts for all VFs to zero */ + list_for_each_entry(vf_info, &snap->mbx_vf, list_entry) + vf_info->msg_count = 0; } /** @@ -195,7 +185,7 @@ ice_mbx_traverse(struct ice_hw *hw, /** * ice_mbx_detect_malvf - Detect malicious VF in snapshot * @hw: pointer to the HW struct - * @vf_id: relative virtual function ID + * @vf_info: mailbox tracking structure for a VF * @new_state: new algorithm state * @is_malvf: boolean output to indicate if VF is malicious * @@ -204,19 +194,14 @@ ice_mbx_traverse(struct ice_hw *hw, * the permissible number of messages to send. */ static int -ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, +ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, enum ice_mbx_snapshot_state *new_state, bool *is_malvf) { - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + /* increment the message count for this VF */ + vf_info->msg_count++; - if (vf_id >= snap->mbx_vf.vfcntr_len) - return -EIO; - - /* increment the message count in the VF array */ - snap->mbx_vf.vf_cntr[vf_id]++; - - if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD) + if (vf_info->msg_count >= ICE_ASYNC_VF_MSG_THRESHOLD) *is_malvf = true; /* continue to iterate through the mailbox snapshot */ @@ -229,7 +214,7 @@ ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, * ice_mbx_vf_state_handler - Handle states of the overflow algorithm * @hw: pointer to the HW struct * @mbx_data: pointer to structure containing mailbox data - * @vf_id: relative virtual function (VF) ID + * @vf_info: mailbox tracking structure for the VF in question * @is_malvf: boolean output to indicate if VF is malicious * * The function serves as an entry point for the malicious VF @@ -250,7 +235,8 @@ ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, */ int ice_mbx_vf_state_handler(struct ice_hw *hw, - struct ice_mbx_data *mbx_data, u16 vf_id, + struct ice_mbx_data *mbx_data, + struct ice_mbx_vf_info *vf_info, bool *is_malvf) { struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; @@ -315,7 +301,8 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, if (snap_buf->num_pending_arq >= mbx_data->async_watermark_val) { new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + status = ice_mbx_detect_malvf(hw, vf_info, &new_state, + is_malvf); } else { new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; ice_mbx_traverse(hw, &new_state); @@ -329,7 +316,8 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, case ICE_MAL_VF_DETECT_STATE_DETECT: new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + status = ice_mbx_detect_malvf(hw, vf_info, &new_state, + is_malvf); break; default: @@ -367,34 +355,16 @@ ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, } /** - * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID - * @snap: pointer to the mailbox snapshot structure - * @vf_id: relative virtual function ID of the malicious VF - * @vf_info: mailbox tracking structure for this VF + * ice_mbx_clear_malvf - Clear VF mailbox info + * @vf_info: the mailbox tracking structure for a VF * -* In case of a VF reset, this function shall be called to clear the VF's -* current mailbox tracking state. -*/ -void -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, u16 vf_id, - struct ice_mbx_vf_info *vf_info) + * In case of a VF reset, this function shall be called to clear the VF's + * current mailbox tracking state. + */ +void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info) { - if (WARN_ON(!snap)) - return; - - /* Ensure VF ID value is not larger than bitmap or VF counter length */ - if (WARN_ON(vf_id >= snap->mbx_vf.vfcntr_len)) - return; - vf_info->malicious = 0; - - /* Clear the VF counter in the mailbox snapshot structure for that VF ID. - * This is to ensure that if a VF is unloaded and a new one brought back - * up with the same VF ID for a snapshot currently in traversal or detect - * state the counter for that VF ID does not increment on top of existing - * values in the mailbox overflow detection algorithm. - */ - snap->mbx_vf.vf_cntr[vf_id] = 0; + vf_info->msg_count = 0; } /** @@ -402,55 +372,32 @@ ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, u16 vf_id, * @hw: pointer to the hardware structure * @vf_info: the mailbox tracking info structure for a VF * - * Initialize a VF mailbox tracking info structure. + * Initialize a VF mailbox tracking info structure and insert it into the + * snapshot list. + * + * If you remove the VF, you must also delete the associated VF info structure + * from the linked list. */ void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info) { - vf_info->malicious = 0; + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + ice_mbx_clear_malvf(vf_info); + list_add(&vf_info->list_entry, &snap->mbx_vf); } /** - * ice_mbx_init_snapshot - Initialize mailbox snapshot structure + * ice_mbx_init_snapshot - Initialize mailbox snapshot data * @hw: pointer to the hardware structure - * @vf_count: number of VFs allocated on a PF - * - * Clear the mailbox snapshot structure and allocate memory - * for the VF counter array based on the number of VFs allocated - * on that PF. * - * Assumption: This function will assume ice_get_caps() has already been - * called to ensure that the vf_count can be compared against the number - * of VFs supported as defined in the functional capabilities of the device. + * Clear the mailbox snapshot structure and initialize the VF mailbox list. */ -int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count) +void ice_mbx_init_snapshot(struct ice_hw *hw) { struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; - /* Ensure that the number of VFs allocated is non-zero and - * is not greater than the number of supported VFs defined in - * the functional capabilities of the PF. - */ - if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs) - return -EINVAL; - - snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count, - sizeof(*snap->mbx_vf.vf_cntr), - GFP_KERNEL); - if (!snap->mbx_vf.vf_cntr) - return -ENOMEM; - - /* Setting the VF counter length to the number of allocated - * VFs for given PF's functional capabilities. - */ - snap->mbx_vf.vfcntr_len = vf_count; - - /* Clear mbx_buf in the mailbox snaphot structure and setting the - * mailbox snapshot state to a new capture. - */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); - snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; - - return 0; + INIT_LIST_HEAD(&snap->mbx_vf); + ice_mbx_reset_snapshot(snap); } /** @@ -463,10 +410,6 @@ void ice_mbx_deinit_snapshot(struct ice_hw *hw) { struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; - /* Free VF counter array and reset VF counter length */ - devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr); - snap->mbx_vf.vfcntr_len = 0; - /* Clear mbx_buf in the mailbox snaphot structure */ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index 2613cba61ac7d..a6d42f467dc57 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -21,12 +21,10 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); int ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, - u16 vf_id, bool *is_mal_vf); -void -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, u16 vf_id, - struct ice_mbx_vf_info *vf_info); + struct ice_mbx_vf_info *vf_info, bool *is_mal_vf); +void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info); void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info); -int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); +void ice_mbx_init_snapshot(struct ice_hw *hw); void ice_mbx_deinit_snapshot(struct ice_hw *hw); int ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, -- GitLab From 4bdf5f258331f049bbff2d770cfcb62f6b789dfe Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:11 -0800 Subject: [PATCH 0385/2078] ice: remove ice_mbx_deinit_snapshot The ice_mbx_deinit_snapshot function's only remaining job is to clear the previous snapshot data. This snapshot data is initialized when SR-IOV adds VFs, so it is not necessary to clear this data when removing VFs. Since no allocation occurs we no longer need to free anything and we can safely remove this function. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 +---- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 14 -------------- drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 1 - 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 8820f269bfdf4..b65025b515269 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1014,7 +1014,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); - ice_mbx_deinit_snapshot(&pf->hw); if (pf->lag) ice_enable_lag(pf->lag); return 0; @@ -1027,10 +1026,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) ice_mbx_init_snapshot(&pf->hw); err = ice_pci_sriov_ena(pf, num_vfs); - if (err) { - ice_mbx_deinit_snapshot(&pf->hw); + if (err) return err; - } if (pf->lag) ice_disable_lag(pf->lag); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index 4bfed5fb3a885..1f332ab43b00f 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -399,17 +399,3 @@ void ice_mbx_init_snapshot(struct ice_hw *hw) INIT_LIST_HEAD(&snap->mbx_vf); ice_mbx_reset_snapshot(snap); } - -/** - * ice_mbx_deinit_snapshot - Free mailbox snapshot structure - * @hw: pointer to the hardware structure - * - * Clear the mailbox snapshot structure and free the VF counter array. - */ -void ice_mbx_deinit_snapshot(struct ice_hw *hw) -{ - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; - - /* Clear mbx_buf in the mailbox snaphot structure */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); -} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index a6d42f467dc57..e4bdd93ccef14 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -25,7 +25,6 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info); void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info); void ice_mbx_init_snapshot(struct ice_hw *hw); -void ice_mbx_deinit_snapshot(struct ice_hw *hw); int ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, bool *report_malvf); -- GitLab From 07cc1a942216d1f211f1c641af8b6f810bb16699 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:12 -0800 Subject: [PATCH 0386/2078] ice: merge ice_mbx_report_malvf with ice_mbx_vf_state_handler The ice_mbx_report_malvf function is used to update the ice_mbx_vf_info.malicious member after we detect a malicious VF. This is done by calling ice_mbx_report_malvf after ice_mbx_vf_state_handler sets its "is_malvf" return parameter true. Instead of requiring two steps, directly update the malicious bit in the state handler, and remove the need for separately calling ice_mbx_report_malvf. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 34 +++++--------- drivers/net/ethernet/intel/ice/ice_vf_mbx.c | 51 ++++++--------------- drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 5 +- 3 files changed, 28 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b65025b515269..6152c90d7286d 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1794,7 +1794,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, s16 vf_id = le16_to_cpu(event->desc.retval); struct device *dev = ice_pf_to_dev(pf); struct ice_mbx_data mbxdata; - bool malvf = false; + bool report_malvf = false; struct ice_vf *vf; int status; @@ -1811,33 +1811,23 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, #define ICE_MBX_OVERFLOW_WATERMARK 64 mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; - /* check to see if we have a malicious VF */ - status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, &vf->mbx_info, &malvf); + /* check to see if we have a newly malicious VF */ + status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, &vf->mbx_info, + &report_malvf); if (status) goto out_put_vf; - if (malvf) { - bool report_vf = false; + if (report_malvf) { + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - /* if the VF is malicious and we haven't let the user - * know about it, then let them know now - */ - status = ice_mbx_report_malvf(&pf->hw, &vf->mbx_info, - &report_vf); - if (status) - dev_dbg(dev, "Error reporting malicious VF\n"); - - if (report_vf) { - struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - - if (pf_vsi) - dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dev_lan_addr[0], - pf_vsi->netdev->dev_addr); - } + if (pf_vsi) + dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", + &vf->dev_lan_addr[0], + pf_vsi->netdev->dev_addr); } out_put_vf: ice_put_vf(vf); - return malvf; + + return vf->mbx_info.malicious; } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index 1f332ab43b00f..40cb4ba0789ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -215,7 +215,7 @@ ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, * @hw: pointer to the HW struct * @mbx_data: pointer to structure containing mailbox data * @vf_info: mailbox tracking structure for the VF in question - * @is_malvf: boolean output to indicate if VF is malicious + * @report_malvf: boolean output to indicate whether VF should be reported * * The function serves as an entry point for the malicious VF * detection algorithm by handling the different states and state @@ -234,25 +234,24 @@ ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, * the static snapshot and look for a malicious VF. */ int -ice_mbx_vf_state_handler(struct ice_hw *hw, - struct ice_mbx_data *mbx_data, - struct ice_mbx_vf_info *vf_info, - bool *is_malvf) +ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, + struct ice_mbx_vf_info *vf_info, bool *report_malvf) { struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; struct ice_mbx_snap_buffer_data *snap_buf; struct ice_ctl_q_info *cq = &hw->mailboxq; enum ice_mbx_snapshot_state new_state; + bool is_malvf = false; int status = 0; - if (!is_malvf || !mbx_data) + if (!report_malvf || !mbx_data || !vf_info) return -EINVAL; + *report_malvf = false; + /* When entering the mailbox state machine assume that the VF * is not malicious until detected. */ - *is_malvf = false; - /* Checking if max messages allowed to be processed while servicing current * interrupt is not less than the defined AVF message threshold. */ @@ -301,8 +300,7 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, if (snap_buf->num_pending_arq >= mbx_data->async_watermark_val) { new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_info, &new_state, - is_malvf); + status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf); } else { new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; ice_mbx_traverse(hw, &new_state); @@ -316,8 +314,7 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, case ICE_MAL_VF_DETECT_STATE_DETECT: new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_info, &new_state, - is_malvf); + status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf); break; default: @@ -327,31 +324,13 @@ ice_mbx_vf_state_handler(struct ice_hw *hw, snap_buf->state = new_state; - return status; -} - -/** - * ice_mbx_report_malvf - Track and note malicious VF - * @hw: pointer to the HW struct - * @vf_info: the mailbox tracking info structure for a VF - * @report_malvf: boolean to indicate if malicious VF must be reported - * - * This function updates the malicious indicator bit in the VF mailbox - * tracking structure. A malicious VF must be reported only once if discovered - * between VF resets or loading so the function first checks if the VF has - * already been detected in any previous mailbox iterations. - */ -int -ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, - bool *report_malvf) -{ - if (!report_malvf) - return -EINVAL; - - *report_malvf = !vf_info->malicious; - vf_info->malicious = 1; + /* Only report VFs as malicious the first time we detect it */ + if (is_malvf && !vf_info->malicious) { + vf_info->malicious = 1; + *report_malvf = true; + } - return 0; + return status; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index e4bdd93ccef14..41250519bc565 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -21,13 +21,10 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); int ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, - struct ice_mbx_vf_info *vf_info, bool *is_mal_vf); + struct ice_mbx_vf_info *vf_info, bool *report_malvf); void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info); void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info); void ice_mbx_init_snapshot(struct ice_hw *hw); -int -ice_mbx_report_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info, - bool *report_malvf); #else /* CONFIG_PCI_IOV */ static inline int ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw, -- GitLab From dde7db637d9981b47da0da575661d0ec83f8b25a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:13 -0800 Subject: [PATCH 0387/2078] ice: initialize mailbox snapshot earlier in PF init Now that we no longer depend on the number of VFs being allocated, we can move the ice_mbx_init_snapshot function earlier. This will be required by Scalable IOV as we will not be calling ice_sriov_configure for Scalable VFs. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_sriov.c | 2 -- drivers/net/ethernet/intel/ice/ice_vf_mbx.h | 4 ++++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 567694bf098ba..615a731d7afe0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3891,6 +3891,7 @@ static int ice_init_pf(struct ice_pf *pf) mutex_init(&pf->vfs.table_lock); hash_init(pf->vfs.table); + ice_mbx_init_snapshot(&pf->hw); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 6152c90d7286d..79159cbb66ec6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1023,8 +1023,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EBUSY; } - ice_mbx_init_snapshot(&pf->hw); - err = ice_pci_sriov_ena(pf, num_vfs); if (err) return err; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h index 41250519bc565..44bc030d17e07 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -43,5 +43,9 @@ ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support, return 0; } +static inline void ice_mbx_init_snapshot(struct ice_hw *hw) +{ +} + #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VF_MBX_H_ */ -- GitLab From 33b035e70611c10c5aa3864a7517570b25a46ebb Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:14 -0800 Subject: [PATCH 0388/2078] ice: declare ice_vc_process_vf_msg in ice_virtchnl.h The ice_vc_process_vf_msg function is the main entry point for handling virtchnl messages. This function is defined in ice_virtchnl.c but its declaration is still in ice_sriov.c The ice_sriov.c file used to contain all of the virtualization logic until commit bf93bf791cec ("ice: introduce ice_virtchnl.c and ice_virtchnl.h") moved the virtchnl logic to its own file. The ice_vc_process_vf_msg function should have had its declaration moved to ice_virtchnl.h then. Fix this. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.h | 3 --- drivers/net/ethernet/intel/ice/ice_virtchnl.h | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 955ab810a198c..1082b0691a3f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -33,7 +33,6 @@ int ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); void ice_free_vfs(struct ice_pf *pf); -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); bool ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, @@ -68,8 +67,6 @@ ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto); static inline void ice_process_vflr_event(struct ice_pf *pf) { } static inline void ice_free_vfs(struct ice_pf *pf) { } static inline -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index b454654d7b0c6..6d5af29c855ea 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -63,6 +63,7 @@ int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); #else /* CONFIG_PCI_IOV */ static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { } static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { } @@ -81,6 +82,11 @@ static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) { return false; } + +static inline void +ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) +{ +} #endif /* !CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_H_ */ -- GitLab From 4f0636fef61ad0b22fed4fb06f369d6ba38f807d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:15 -0800 Subject: [PATCH 0389/2078] ice: always report VF overflowing mailbox even without PF VSI In ice_is_malicious_vf we report a message warning the system administrator when a VF is potentially spamming the PF with asynchronous messages that could overflow the PF mailbox. The specific message was requested by our customer support team to include the VF and PF MAC address. In some cases we may not be able to locate the PF VSI to obtain the MAC address for the PF. The current implementation discards the message entirely in this case. Fix this to instead print a zero address in that case so that we always print something here. Note that dev_warn will also include the PCI device information allowing another mechanism for determining on which PF the potentially malicious VF belongs. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 79159cbb66ec6..185673afb7819 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1817,11 +1817,11 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, if (report_malvf) { struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + u8 zero_addr[ETH_ALEN] = {}; - if (pf_vsi) - dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dev_lan_addr[0], - pf_vsi->netdev->dev_addr); + dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", + &vf->dev_lan_addr[0], + pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr); } out_put_vf: -- GitLab From 3f22fc3131b814f0d5d1ce6d94fb8239e6df1754 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:16 -0800 Subject: [PATCH 0390/2078] ice: remove unnecessary &array[0] and just use array In ice_is_malicious_vf we print the VF MAC address using %pM by passing the address of the first element of vf->dev_lan_addr. This is equivalent to just passing vf->dev_lan_addr, so do that. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 185673afb7819..938be486721ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1820,7 +1820,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, u8 zero_addr[ETH_ALEN] = {}; dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dev_lan_addr[0], + vf->dev_lan_addr, pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr); } -- GitLab From afc24d6584fbd246d98c0feb464b94da67661e3e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:17 -0800 Subject: [PATCH 0391/2078] ice: pass mbxdata to ice_is_malicious_vf() The ice_is_malicious_vf() function takes information about the current state of the mailbox during a single interrupt. This information includes the number of messages processed so far, as well as the number of pending messages not yet processed. A future refactor is going to make ice_vc_process_vf_msg() call ice_is_malicious_vf() instead of having it called separately in ice_main.c This change will require passing all the necessary arguments into ice_vc_process_vf_msg(). To make this simpler, have the main loop fill in the struct ice_mbx_data and pass that rather than passing in the num_msg_proc and num_msg_pending. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 10 +++++++++- drivers/net/ethernet/intel/ice/ice_sriov.c | 14 +++----------- drivers/net/ethernet/intel/ice/ice_sriov.h | 5 ++--- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 615a731d7afe0..a7e7a186009e1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1393,6 +1393,8 @@ static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf) wake_up(&pf->aq_wait_queue); } +#define ICE_MBX_OVERFLOW_WATERMARK 64 + /** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf @@ -1483,6 +1485,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) return 0; do { + struct ice_mbx_data data = {}; u16 opcode; int ret; @@ -1509,7 +1512,12 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_vf_lan_overflow_event(pf, &event); break; case ice_mbx_opc_send_msg_to_pf: - if (!ice_is_malicious_vf(pf, &event, i, pending)) + data.num_msg_proc = i; + data.num_pending_arq = pending; + data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries; + data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; + + if (!ice_is_malicious_vf(pf, &event, &data)) ice_vc_process_vf_msg(pf, &event); break; case ice_aqc_opc_fw_logging: diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 938be486721ec..5ae923ea979c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1782,16 +1782,14 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) * ice_is_malicious_vf - helper function to detect a malicious VF * @pf: ptr to struct ice_pf * @event: pointer to the AQ event - * @num_msg_proc: the number of messages processed so far - * @num_msg_pending: the number of messages peinding in admin queue + * @mbxdata: data about the state of the mailbox */ bool ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - u16 num_msg_proc, u16 num_msg_pending) + struct ice_mbx_data *mbxdata) { s16 vf_id = le16_to_cpu(event->desc.retval); struct device *dev = ice_pf_to_dev(pf); - struct ice_mbx_data mbxdata; bool report_malvf = false; struct ice_vf *vf; int status; @@ -1803,14 +1801,8 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) goto out_put_vf; - mbxdata.num_msg_proc = num_msg_proc; - mbxdata.num_pending_arq = num_msg_pending; - mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries; -#define ICE_MBX_OVERFLOW_WATERMARK 64 - mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; - /* check to see if we have a newly malicious VF */ - status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, &vf->mbx_info, + status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info, &report_malvf); if (status) goto out_put_vf; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 1082b0691a3f6..8fa61d954fae4 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -36,7 +36,7 @@ void ice_free_vfs(struct ice_pf *pf); void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); bool ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - u16 num_msg_proc, u16 num_msg_pending); + struct ice_mbx_data *mbxdata); int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, @@ -75,8 +75,7 @@ static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } static inline bool ice_is_malicious_vf(struct ice_pf __always_unused *pf, struct ice_rq_event_info __always_unused *event, - u16 __always_unused num_msg_proc, - u16 __always_unused num_msg_pending) + struct ice_mbx_data *mbxdata) { return false; } -- GitLab From 4508bf02bf8a3de8fb65869f40dfdef74dc1b339 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:18 -0800 Subject: [PATCH 0392/2078] ice: print message if ice_mbx_vf_state_handler returns an error If ice_mbx_vf_state_handler() returns an error, the ice_is_malicious_vf() function just exits without printing anything. Instead, use dev_warn_ratelimited to print a warning that we were unable to check the status for this VF. The _ratelimited variant is used to avoid potentially spamming the log if this function is failing consistently for every single mailbox message. Also we can drop the "goto" as it simply skips over a report_malvf check. That variable should always be false if ice_mbx_vf_state_handler returns non-zero. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 5ae923ea979c6..f0daeda236def 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1805,7 +1805,8 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info, &report_malvf); if (status) - goto out_put_vf; + dev_warn_ratelimited(dev, "Unable to check status of mailbox overflow for VF %u MAC %pM, status %d\n", + vf->vf_id, vf->dev_lan_addr, status); if (report_malvf) { struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); -- GitLab From c414463ab1bb098e67f4c1a4ef64f3e97780f087 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:19 -0800 Subject: [PATCH 0393/2078] ice: move ice_is_malicious_vf() to ice_virtchnl.c The ice_is_malicious_vf() function is currently implemented in ice_sriov.c This function is not Single Root specific, and a future change is going to refactor the ice_vc_process_vf_msg() function to call this instead of calling it before ice_vc_process_vf_msg() in the main loop of __ice_clean_ctrlq. To make that change easier to review, first move this function into ice_virtchnl.c but leave the call in __ice_clean_ctrlq() alone. Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 45 ------------------- drivers/net/ethernet/intel/ice/ice_sriov.h | 11 ----- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 45 +++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_virtchnl.h | 11 +++++ 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index f0daeda236def..6fa62c3cedb06 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1777,48 +1777,3 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) } } } - -/** - * ice_is_malicious_vf - helper function to detect a malicious VF - * @pf: ptr to struct ice_pf - * @event: pointer to the AQ event - * @mbxdata: data about the state of the mailbox - */ -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - struct ice_mbx_data *mbxdata) -{ - s16 vf_id = le16_to_cpu(event->desc.retval); - struct device *dev = ice_pf_to_dev(pf); - bool report_malvf = false; - struct ice_vf *vf; - int status; - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return false; - - if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - goto out_put_vf; - - /* check to see if we have a newly malicious VF */ - status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info, - &report_malvf); - if (status) - dev_warn_ratelimited(dev, "Unable to check status of mailbox overflow for VF %u MAC %pM, status %d\n", - vf->vf_id, vf->dev_lan_addr, status); - - if (report_malvf) { - struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - u8 zero_addr[ETH_ALEN] = {}; - - dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - vf->dev_lan_addr, - pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr); - } - -out_put_vf: - ice_put_vf(vf); - - return vf->mbx_info.malicious; -} diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 8fa61d954fae4..346cb2666f3a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -34,9 +34,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); void ice_free_vfs(struct ice_pf *pf); void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - struct ice_mbx_data *mbxdata); int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, @@ -72,14 +69,6 @@ static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } -static inline bool -ice_is_malicious_vf(struct ice_pf __always_unused *pf, - struct ice_rq_event_info __always_unused *event, - struct ice_mbx_data *mbxdata) -{ - return false; -} - static inline int ice_sriov_configure(struct pci_dev __always_unused *pdev, int __always_unused num_vfs) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index e24e3f5017ca6..e0c573d9d1b9b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3833,6 +3833,51 @@ void ice_virtchnl_set_repr_ops(struct ice_vf *vf) vf->virtchnl_ops = &ice_virtchnl_repr_ops; } +/** + * ice_is_malicious_vf - helper function to detect a malicious VF + * @pf: ptr to struct ice_pf + * @event: pointer to the AQ event + * @mbxdata: data about the state of the mailbox + */ +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + struct ice_mbx_data *mbxdata) +{ + s16 vf_id = le16_to_cpu(event->desc.retval); + struct device *dev = ice_pf_to_dev(pf); + bool report_malvf = false; + struct ice_vf *vf; + int status; + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return false; + + if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) + goto out_put_vf; + + /* check to see if we have a newly malicious VF */ + status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info, + &report_malvf); + if (status) + dev_warn_ratelimited(dev, "Unable to check status of mailbox overflow for VF %u MAC %pM, status %d\n", + vf->vf_id, vf->dev_lan_addr, status); + + if (report_malvf) { + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + u8 zero_addr[ETH_ALEN] = {}; + + dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", + vf->dev_lan_addr, + pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr); + } + +out_put_vf: + ice_put_vf(vf); + + return vf->mbx_info.malicious; +} + /** * ice_vc_process_vf_msg - Process request from VF * @pf: pointer to the PF structure diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 6d5af29c855ea..648a383fad853 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -63,6 +63,9 @@ int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + struct ice_mbx_data *mbxdata); void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); #else /* CONFIG_PCI_IOV */ static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { } @@ -83,6 +86,14 @@ static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) return false; } +static inline bool +ice_is_malicious_vf(struct ice_pf __always_unused *pf, + struct ice_rq_event_info __always_unused *event, + struct ice_mbx_data *mbxdata) +{ + return false; +} + static inline void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { -- GitLab From be96815c616822d3800405b8fbebe3e069d6eed2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Feb 2023 09:09:20 -0800 Subject: [PATCH 0394/2078] ice: call ice_is_malicious_vf() from ice_vc_process_vf_msg() The main loop in __ice_clean_ctrlq first checks if a VF might be malicious before calling ice_vc_process_vf_msg(). This results in duplicate code in both functions to obtain a reference to the VF, and exports the ice_is_malicious_vf() from ice_virtchnl.c unnecessarily. Refactor ice_is_malicious_vf() to be a static function that takes a pointer to the VF. Call this in ice_vc_process_vf_msg() just after we obtain a reference to the VF by calling ice_get_vf_by_id. Pass the mailbox data from the __ice_clean_ctrlq function into ice_vc_process_vf_msg() instead of calling ice_is_malicious_vf(). This reduces the number of exported functions and avoids the need to obtain the VF reference twice for every mailbox message. Note that the state check for ICE_VF_STATE_DIS is kept in ice_is_malicious_vf() and we call this before checking that state in ice_vc_process_vf_msg. This is intentional, as we stop responding to VF messages from a VF once we detect that it may be overflowing the mailbox. This ensures that we continue to silently ignore the message as before without responding via ice_vc_send_msg_to_vf(). Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 36 ++++++++++--------- drivers/net/ethernet/intel/ice/ice_virtchnl.h | 17 +++------ 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a7e7a186009e1..20b3f3e6eda1c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1517,8 +1517,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries; data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; - if (!ice_is_malicious_vf(pf, &event, &data)) - ice_vc_process_vf_msg(pf, &event); + ice_vc_process_vf_msg(pf, &event, &data); break; case ice_aqc_opc_fw_logging: ice_output_fw_log(hw, &event.desc, event.msg_buf); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index e0c573d9d1b9b..97243c616d5d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3834,27 +3834,26 @@ void ice_virtchnl_set_repr_ops(struct ice_vf *vf) } /** - * ice_is_malicious_vf - helper function to detect a malicious VF - * @pf: ptr to struct ice_pf - * @event: pointer to the AQ event + * ice_is_malicious_vf - check if this vf might be overflowing mailbox + * @vf: the VF to check * @mbxdata: data about the state of the mailbox + * + * Detect if a given VF might be malicious and attempting to overflow the PF + * mailbox. If so, log a warning message and ignore this event. */ -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - struct ice_mbx_data *mbxdata) +static bool +ice_is_malicious_vf(struct ice_vf *vf, struct ice_mbx_data *mbxdata) { - s16 vf_id = le16_to_cpu(event->desc.retval); - struct device *dev = ice_pf_to_dev(pf); bool report_malvf = false; - struct ice_vf *vf; + struct device *dev; + struct ice_pf *pf; int status; - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return false; + pf = vf->pf; + dev = ice_pf_to_dev(pf); if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - goto out_put_vf; + return vf->mbx_info.malicious; /* check to see if we have a newly malicious VF */ status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info, @@ -3872,9 +3871,6 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr); } -out_put_vf: - ice_put_vf(vf); - return vf->mbx_info.malicious; } @@ -3882,11 +3878,13 @@ out_put_vf: * ice_vc_process_vf_msg - Process request from VF * @pf: pointer to the PF structure * @event: pointer to the AQ event + * @mbxdata: information used to detect VF attempting mailbox overflow * * called from the common asq/arq handler to * process request from VF */ -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event, + struct ice_mbx_data *mbxdata) { u32 v_opcode = le32_to_cpu(event->desc.cookie_high); s16 vf_id = le16_to_cpu(event->desc.retval); @@ -3908,6 +3906,10 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) mutex_lock(&vf->cfg_lock); + /* Check if the VF is trying to overflow the mailbox */ + if (ice_is_malicious_vf(vf, mbxdata)) + goto finish; + /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) { err = -EPERM; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 648a383fad853..cd747718de738 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -63,10 +63,8 @@ int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - struct ice_mbx_data *mbxdata); -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event, + struct ice_mbx_data *mbxdata); #else /* CONFIG_PCI_IOV */ static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { } static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { } @@ -86,16 +84,9 @@ static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) return false; } -static inline bool -ice_is_malicious_vf(struct ice_pf __always_unused *pf, - struct ice_rq_event_info __always_unused *event, - struct ice_mbx_data *mbxdata) -{ - return false; -} - static inline void -ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) +ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event, + struct ice_mbx_data *mbxdata) { } #endif /* !CONFIG_PCI_IOV */ -- GitLab From 34f0677e7afd3a292bc1aadda7ce8e35faedb204 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Mar 2023 11:40:17 -0700 Subject: [PATCH 0395/2078] bpf: fix precision propagation verbose logging Fix wrong order of frame index vs register/slot index in precision propagation verbose (level 2) output. It's wrong and very confusing as is. Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230313184017.4083374-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 519d465407ef6..883d4ff2e2883 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15068,7 +15068,7 @@ static int propagate_precision(struct bpf_verifier_env *env, !(state_reg->live & REG_LIVE_READ)) continue; if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "frame %d: propagating r%d\n", i, fr); + verbose(env, "frame %d: propagating r%d\n", fr, i); err = mark_chain_precision_frame(env, fr, i); if (err < 0) return err; @@ -15084,7 +15084,7 @@ static int propagate_precision(struct bpf_verifier_env *env, continue; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "frame %d: propagating fp%d\n", - (-i - 1) * BPF_REG_SIZE, fr); + fr, (-i - 1) * BPF_REG_SIZE); err = mark_chain_precision_stack_frame(env, fr, i); if (err < 0) return err; -- GitLab From 22df776a9a866713d9decfb92b633bcfdb571954 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 15 Feb 2023 17:30:33 -0600 Subject: [PATCH 0396/2078] tasks: Extract rcu_users out of union In commit 3fbd7ee285b2b ("tasks: Add a count of task RCU users"), a count on the number of RCU users was added to struct task_struct. This was done so as to enable the removal of task_rcu_dereference(), and allow tasks to be protected by RCU even after exiting and being removed from the runqueue. In this commit, the 'refcount_t rcu_users' field that keeps track of this refcount was put into a union co-located with 'struct rcu_head rcu', so as to avoid taking up any extra space in task_struct. This was possible to do safely, because the field was only ever decremented by a static set of specific callers, and then never incremented again. While this restriction of there only being a small, static set of users of this field has worked fine, it prevents us from leveraging the field to use RCU to protect tasks in other contexts. During tracing, for example, it would be useful to be able to collect some tasks that performed a certain operation, put them in a map, and then periodically summarize who they are, which cgroup they're in, how much CPU time they've utilized, etc. While this can currently be done with 'usage', it becomes tricky when a task is already in a map, or if a reference should only be taken if a task is valid and will not soon be reaped. Ideally, we could do something like pass a reference to a map value, and then try to acquire a reference to the task in an RCU read region by using refcount_inc_not_zero(). Similarly, in sched_ext, schedulers are using integer pids to remember tasks, and then looking them up with find_task_by_pid_ns(). This is slow, error prone, and adds complexity. It would be more convenient and performant if BPF schedulers could instead store tasks directly in maps, and then leverage RCU to ensure they can be safely accessed with low overhead. Finally, overloading fields like this is error prone. Someone that wants to use 'rcu_users' could easily overlook the fact that once the rcu callback is scheduled, the refcount will go back to being nonzero, thus precluding the use of refcount_inc_not_zero(). Furthermore, as described below, it's possible to extract the fields of the union without changing the size of task_struct. There are several possible ways to enable this: 1. The lightest touch approach is likely the one proposed in this patch, which is to simply extract 'rcu_users' and 'rcu' from the union, so that scheduling the 'rcu' callback doesn't overwrite the 'rcu_users' refcount. If we have a trusted task pointer, this would allow us to use refcnt_inc_not_zero() inside of an RCU region to determine if we can safely acquire a reference to the task and store it in a map. As mentioned below, this can be done without changing the size of task_struct, by moving the location of the union to another location that has padding gaps we can fill in. 2. Removing 'refcount_t rcu_users', and instead having the entire task be freed in an rcu callback. This is likely the most sound overall design, though it changes the behavioral semantics exposed to callers, who currently expect that a task that's successfully looked up in e.g. the pid_list with find_task_by_pid_ns(), can always have a 'usage' reference acquired on them, as it's guaranteed to be > 0 until after the next gp. In order for this approach to work, we'd have to audit all callers. This approach also slightly changes behavior observed by user space by not invoking trace_sched_process_free() until the whole task_struct is actually being freed, rather than just after it's exited. It also may change timings, as memory will be freed in an RCU callback rather than immediately when the final 'usage' refcount drops to 0. This also is arguably a benefit, as it provides more predictable performance to callers who are refcounting tasks. 3. There may be other solutions as well that don't require changing the layout of task_struct. For example, we could possibly do something complex from the BPF side, such as listen for task exit and remove a task from a map when the task is exiting. This would likely require significant custom handling for task_struct in the verifier, so a more generalizable solution is likely warranted. As mentioned above, this patch proposes the lightest-touch approach which allows callers elsewhere in the kernel to use 'rcu_users' to ensure the lifetime of a task, by extracting 'rcu_users' and 'rcu' from the union. There is no size change in task_struct with this patch. Cc: Linus Torvalds Cc: Eric W. Biederman Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: David Vernet Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20230215233033.889644-1-void@manifault.com Signed-off-by: Alexei Starovoitov --- include/linux/sched.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 63d242164b1a9..b11b4517760f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1318,11 +1318,6 @@ struct task_struct { struct tlbflush_unmap_batch tlb_ubc; - union { - refcount_t rcu_users; - struct rcu_head rcu; - }; - /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; @@ -1459,6 +1454,8 @@ struct task_struct { unsigned long saved_state_change; # endif #endif + struct rcu_head rcu; + refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; -- GitLab From e4ed8ba08e3f1ef24771eb95f87da129ad752063 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 7 Mar 2023 22:44:02 +0100 Subject: [PATCH 0397/2078] net: phy: micrel: Add support for PTP_PF_PEROUT for lan8841 Lan8841 has 10 GPIOs and it has 2 events(EVENT_A and EVENT_B). It is possible to assigned the 2 events to any of the GPIOs, but a GPIO can have only 1 event at a time. These events are used to generate periodic signals. It is possible to configure the length, the start time and the period of the signal by configuring the event. Currently the SW uses only EVENT_A to generate the perout. These events are generated by comparing the target time with the PHC time. In case the PHC time is changed to a value bigger than the target time + reload time, then it would generate only 1 event and then it would stop because target time + reload time is small than PHC time. Therefore it is required to change also the target time every time when the PHC is changed. The same will apply also when the PHC time is changed to a smaller value. This was tested using: testptp -L 6,2 testptp -p 1000000000 -w 200000000 Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20230307214402.793057-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 391 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 389 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2c84fccef4f64..e5b2af69ac033 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -318,6 +318,7 @@ struct kszphy_ptp_priv { struct ptp_clock_info ptp_clock_info; /* Lock for ptp_clock */ struct mutex ptp_lock; + struct ptp_pin_desc *pin_config; }; struct kszphy_priv { @@ -3658,6 +3659,77 @@ static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } +#define LAN8841_EVENT_A 0 +#define LAN8841_EVENT_B 1 +#define LAN8841_PTP_LTC_TARGET_SEC_HI(event) ((event) == LAN8841_EVENT_A ? 278 : 288) +#define LAN8841_PTP_LTC_TARGET_SEC_LO(event) ((event) == LAN8841_EVENT_A ? 279 : 289) +#define LAN8841_PTP_LTC_TARGET_NS_HI(event) ((event) == LAN8841_EVENT_A ? 280 : 290) +#define LAN8841_PTP_LTC_TARGET_NS_LO(event) ((event) == LAN8841_EVENT_A ? 281 : 291) + +static int lan8841_ptp_set_target(struct kszphy_ptp_priv *ptp_priv, u8 event, + s64 sec, u32 nsec) +{ + struct phy_device *phydev = ptp_priv->phydev; + int ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_SEC_HI(event), + upper_16_bits(sec)); + if (ret) + return ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_SEC_LO(event), + lower_16_bits(sec)); + if (ret) + return ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_NS_HI(event) & 0x3fff, + upper_16_bits(nsec)); + if (ret) + return ret; + + return phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_NS_LO(event), + lower_16_bits(nsec)); +} + +#define LAN8841_BUFFER_TIME 2 + +static int lan8841_ptp_update_target(struct kszphy_ptp_priv *ptp_priv, + const struct timespec64 *ts) +{ + return lan8841_ptp_set_target(ptp_priv, LAN8841_EVENT_A, + ts->tv_sec + LAN8841_BUFFER_TIME, ts->tv_nsec); +} + +#define LAN8841_PTP_LTC_TARGET_RELOAD_SEC_HI(event) ((event) == LAN8841_EVENT_A ? 282 : 292) +#define LAN8841_PTP_LTC_TARGET_RELOAD_SEC_LO(event) ((event) == LAN8841_EVENT_A ? 283 : 293) +#define LAN8841_PTP_LTC_TARGET_RELOAD_NS_HI(event) ((event) == LAN8841_EVENT_A ? 284 : 294) +#define LAN8841_PTP_LTC_TARGET_RELOAD_NS_LO(event) ((event) == LAN8841_EVENT_A ? 285 : 295) + +static int lan8841_ptp_set_reload(struct kszphy_ptp_priv *ptp_priv, u8 event, + s64 sec, u32 nsec) +{ + struct phy_device *phydev = ptp_priv->phydev; + int ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_SEC_HI(event), + upper_16_bits(sec)); + if (ret) + return ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_SEC_LO(event), + lower_16_bits(sec)); + if (ret) + return ret; + + ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_NS_HI(event) & 0x3fff, + upper_16_bits(nsec)); + if (ret) + return ret; + + return phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_NS_LO(event), + lower_16_bits(nsec)); +} + #define LAN8841_PTP_LTC_SET_SEC_HI 262 #define LAN8841_PTP_LTC_SET_SEC_MID 263 #define LAN8841_PTP_LTC_SET_SEC_LO 264 @@ -3671,6 +3743,7 @@ static int lan8841_ptp_settime64(struct ptp_clock_info *ptp, struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv, ptp_clock_info); struct phy_device *phydev = ptp_priv->phydev; + int ret; /* Set the value to be stored */ mutex_lock(&ptp_priv->ptp_lock); @@ -3683,9 +3756,10 @@ static int lan8841_ptp_settime64(struct ptp_clock_info *ptp, /* Set the command to load the LTC */ phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL, LAN8841_PTP_CMD_CTL_PTP_LTC_LOAD); + ret = lan8841_ptp_update_target(ptp_priv, ts); mutex_unlock(&ptp_priv->ptp_lock); - return 0; + return ret; } #define LAN8841_PTP_LTC_RD_SEC_HI 358 @@ -3740,6 +3814,7 @@ static int lan8841_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) bool add = true; u32 nsec; s32 sec; + int ret; /* The HW allows up to 15 sec to adjust the time, but here we limit to * 10 sec the adjustment. The reason is, in case the adjustment is 14 @@ -3803,7 +3878,13 @@ static int lan8841_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) } mutex_unlock(&ptp_priv->ptp_lock); - return 0; + /* Update the target clock */ + ptp->gettime64(ptp, &ts); + mutex_lock(&ptp_priv->ptp_lock); + ret = lan8841_ptp_update_target(ptp_priv, &ts); + mutex_unlock(&ptp_priv->ptp_lock); + + return ret; } #define LAN8841_PTP_LTC_RATE_ADJ_HI 269 @@ -3839,6 +3920,292 @@ static int lan8841_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) return 0; } +static int lan8841_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_PEROUT: + break; + default: + return -1; + } + + return 0; +} + +#define LAN8841_PTP_GPIO_NUM 10 +#define LAN8841_GPIO_EN 128 +#define LAN8841_GPIO_DIR 129 +#define LAN8841_GPIO_BUF 130 + +static int lan8841_ptp_perout_off(struct kszphy_ptp_priv *ptp_priv, int pin) +{ + struct phy_device *phydev = ptp_priv->phydev; + int ret; + + ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_EN, BIT(pin)); + if (ret) + return ret; + + ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DIR, BIT(pin)); + if (ret) + return ret; + + return phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_BUF, BIT(pin)); +} + +static int lan8841_ptp_perout_on(struct kszphy_ptp_priv *ptp_priv, int pin) +{ + struct phy_device *phydev = ptp_priv->phydev; + int ret; + + ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_EN, BIT(pin)); + if (ret) + return ret; + + ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DIR, BIT(pin)); + if (ret) + return ret; + + return phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_BUF, BIT(pin)); +} + +#define LAN8841_GPIO_DATA_SEL1 131 +#define LAN8841_GPIO_DATA_SEL2 132 +#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK GENMASK(2, 0) +#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_A 1 +#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_B 2 +#define LAN8841_PTP_GENERAL_CONFIG 257 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A BIT(1) +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B BIT(3) +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK GENMASK(7, 4) +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK GENMASK(11, 8) +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A 4 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B 7 + +static int lan8841_ptp_remove_event(struct kszphy_ptp_priv *ptp_priv, int pin, + u8 event) +{ + struct phy_device *phydev = ptp_priv->phydev; + u16 tmp; + int ret; + + /* Now remove pin from the event. GPIO_DATA_SEL1 contains the GPIO + * pins 0-4 while GPIO_DATA_SEL2 contains GPIO pins 5-9, therefore + * depending on the pin, it requires to read a different register + */ + if (pin < 5) { + tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK << (3 * pin); + ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL1, tmp); + } else { + tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK << (3 * (pin - 5)); + ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL2, tmp); + } + if (ret) + return ret; + + /* Disable the event */ + if (event == LAN8841_EVENT_A) + tmp = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A | + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK; + else + tmp = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B | + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK; + return phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_EN, tmp); +} + +static int lan8841_ptp_enable_event(struct kszphy_ptp_priv *ptp_priv, int pin, + u8 event, int pulse_width) +{ + struct phy_device *phydev = ptp_priv->phydev; + u16 tmp; + int ret; + + /* Enable the event */ + if (event == LAN8841_EVENT_A) + ret = phy_modify_mmd(phydev, 2, LAN8841_PTP_GENERAL_CONFIG, + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A | + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK, + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A | + pulse_width << LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A); + else + ret = phy_modify_mmd(phydev, 2, LAN8841_PTP_GENERAL_CONFIG, + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B | + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK, + LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B | + pulse_width << LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B); + if (ret) + return ret; + + /* Now connect the pin to the event. GPIO_DATA_SEL1 contains the GPIO + * pins 0-4 while GPIO_DATA_SEL2 contains GPIO pins 5-9, therefore + * depending on the pin, it requires to read a different register + */ + if (event == LAN8841_EVENT_A) + tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_A; + else + tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_B; + + if (pin < 5) + ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL1, + tmp << (3 * pin)); + else + ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL2, + tmp << (3 * (pin - 5))); + + return ret; +} + +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_200MS 13 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100MS 12 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50MS 11 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10MS 10 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5MS 9 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1MS 8 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500US 7 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100US 6 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50US 5 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10US 4 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5US 3 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1US 2 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500NS 1 +#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS 0 + +static int lan8841_ptp_perout(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv, + ptp_clock_info); + struct phy_device *phydev = ptp_priv->phydev; + struct timespec64 ts_on, ts_period; + s64 on_nsec, period_nsec; + int pulse_width; + int pin; + int ret; + + if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE) + return -EOPNOTSUPP; + + pin = ptp_find_pin(ptp_priv->ptp_clock, PTP_PF_PEROUT, rq->perout.index); + if (pin == -1 || pin >= LAN8841_PTP_GPIO_NUM) + return -EINVAL; + + if (!on) { + ret = lan8841_ptp_perout_off(ptp_priv, pin); + if (ret) + return ret; + + return lan8841_ptp_remove_event(ptp_priv, LAN8841_EVENT_A, pin); + } + + ts_on.tv_sec = rq->perout.on.sec; + ts_on.tv_nsec = rq->perout.on.nsec; + on_nsec = timespec64_to_ns(&ts_on); + + ts_period.tv_sec = rq->perout.period.sec; + ts_period.tv_nsec = rq->perout.period.nsec; + period_nsec = timespec64_to_ns(&ts_period); + + if (period_nsec < 200) { + pr_warn_ratelimited("%s: perout period too small, minimim is 200 nsec\n", + phydev_name(phydev)); + return -EOPNOTSUPP; + } + + if (on_nsec >= period_nsec) { + pr_warn_ratelimited("%s: pulse width must be smaller than period\n", + phydev_name(phydev)); + return -EINVAL; + } + + switch (on_nsec) { + case 200000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_200MS; + break; + case 100000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100MS; + break; + case 50000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50MS; + break; + case 10000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10MS; + break; + case 5000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5MS; + break; + case 1000000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1MS; + break; + case 500000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500US; + break; + case 100000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100US; + break; + case 50000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50US; + break; + case 10000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10US; + break; + case 5000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5US; + break; + case 1000: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1US; + break; + case 500: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500NS; + break; + case 100: + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS; + break; + default: + pr_warn_ratelimited("%s: Use default duty cycle of 100ns\n", + phydev_name(phydev)); + pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS; + break; + } + + mutex_lock(&ptp_priv->ptp_lock); + ret = lan8841_ptp_set_target(ptp_priv, LAN8841_EVENT_A, rq->perout.start.sec, + rq->perout.start.nsec); + mutex_unlock(&ptp_priv->ptp_lock); + if (ret) + return ret; + + ret = lan8841_ptp_set_reload(ptp_priv, LAN8841_EVENT_A, rq->perout.period.sec, + rq->perout.period.nsec); + if (ret) + return ret; + + ret = lan8841_ptp_enable_event(ptp_priv, pin, LAN8841_EVENT_A, + pulse_width); + if (ret) + return ret; + + ret = lan8841_ptp_perout_on(ptp_priv, pin); + if (ret) + lan8841_ptp_remove_event(ptp_priv, pin, LAN8841_EVENT_A); + + return ret; +} + +static int lan8841_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_PEROUT: + return lan8841_ptp_perout(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + + return 0; +} + static struct ptp_clock_info lan8841_ptp_clock_info = { .owner = THIS_MODULE, .name = "lan8841 ptp", @@ -3847,6 +4214,10 @@ static struct ptp_clock_info lan8841_ptp_clock_info = { .settime64 = lan8841_ptp_settime64, .adjtime = lan8841_ptp_adjtime, .adjfine = lan8841_ptp_adjfine, + .verify = lan8841_ptp_verify, + .enable = lan8841_ptp_enable, + .n_per_out = LAN8841_PTP_GPIO_NUM, + .n_pins = LAN8841_PTP_GPIO_NUM, }; #define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER 3 @@ -3874,7 +4245,23 @@ static int lan8841_probe(struct phy_device *phydev) priv = phydev->priv; ptp_priv = &priv->ptp_priv; + ptp_priv->pin_config = devm_kcalloc(&phydev->mdio.dev, + LAN8841_PTP_GPIO_NUM, + sizeof(*ptp_priv->pin_config), + GFP_KERNEL); + if (!ptp_priv->pin_config) + return -ENOMEM; + + for (int i = 0; i < LAN8841_PTP_GPIO_NUM; ++i) { + struct ptp_pin_desc *p = &ptp_priv->pin_config[i]; + + snprintf(p->name, sizeof(p->name), "pin%d", i); + p->index = i; + p->func = PTP_PF_NONE; + } + ptp_priv->ptp_clock_info = lan8841_ptp_clock_info; + ptp_priv->ptp_clock_info.pin_config = ptp_priv->pin_config; ptp_priv->ptp_clock = ptp_clock_register(&ptp_priv->ptp_clock_info, &phydev->mdio.dev); if (IS_ERR(ptp_priv->ptp_clock)) { -- GitLab From c5a8027de26e1b7c0ba4f8e430165b6a4a29a869 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 10 Mar 2023 08:45:00 +0100 Subject: [PATCH 0398/2078] net: phy: dp83867: Disable IRQs on suspend Before putting the PHY into IEEE power down mode, disable IRQs to prevent accessing the PHY once MDIO has already been shutdown. Signed-off-by: Alexander Stein Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230310074500.3472858-1-alexander.stein@ew.tq-group.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 89cd821f1f466..5821f04c69dcf 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -693,6 +693,30 @@ static int dp83867_of_init(struct phy_device *phydev) } #endif /* CONFIG_OF_MDIO */ +static int dp83867_suspend(struct phy_device *phydev) +{ + /* Disable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_DISABLED; + dp83867_config_intr(phydev); + } + + return genphy_suspend(phydev); +} + +static int dp83867_resume(struct phy_device *phydev) +{ + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + dp83867_config_intr(phydev); + } + + genphy_resume(phydev); + + return 0; +} + static int dp83867_probe(struct phy_device *phydev) { struct dp83867_private *dp83867; @@ -968,8 +992,8 @@ static struct phy_driver dp83867_driver[] = { .config_intr = dp83867_config_intr, .handle_interrupt = dp83867_handle_interrupt, - .suspend = genphy_suspend, - .resume = genphy_resume, + .suspend = dp83867_suspend, + .resume = dp83867_resume, .link_change_notify = dp83867_link_change_notify, .set_loopback = dp83867_loopback, -- GitLab From ad4bf5f2406f6a2e29266bbad74e18f0d955ac4c Mon Sep 17 00:00:00 2001 From: Vincenzo Palazzo Date: Fri, 10 Mar 2023 23:18:51 +0100 Subject: [PATCH 0399/2078] net: socket: suppress unused warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit suppress unused warnings and fix the error that there is with the W=1 enabled. Warning generated net/socket.c: In function ‘__sys_getsockopt’: net/socket.c:2300:13: error: variable ‘max_optlen’ set but not used [-Werror=unused-but-set-variable] 2300 | int max_optlen; Signed-off-by: Vincenzo Palazzo Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230310221851.304657-1-vincenzopalazzodev@gmail.com Signed-off-by: Jakub Kicinski --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 9c92c0e6c4da8..73e493da45896 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2292,9 +2292,9 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int __sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { + int max_optlen __maybe_unused; int err, fput_needed; struct socket *sock; - int max_optlen; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) -- GitLab From 90c7dd32652bcfcc7716c762bc095a7d49ad85dd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 8 Mar 2023 21:34:13 +0100 Subject: [PATCH 0400/2078] net: phy: smsc: use device_property_present in smsc_phy_probe Use unified device property API. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/a969f012-1d3b-7a36-51cf-89a5f8f15a9b@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index ac951d67edcc4..9cfaccce1241c 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -269,7 +269,6 @@ static void smsc_get_stats(struct phy_device *phydev, static int smsc_phy_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; - struct device_node *of_node = dev->of_node; struct smsc_phy_priv *priv; struct clk *refclk; @@ -279,7 +278,7 @@ static int smsc_phy_probe(struct phy_device *phydev) priv->energy_enable = true; - if (of_property_read_bool(of_node, "smsc,disable-energy-detect")) + if (device_property_present(dev, "smsc,disable-energy-detect")) priv->energy_enable = false; phydev->priv = priv; -- GitLab From 4821c186b9c343760a293b455b603735291a7865 Mon Sep 17 00:00:00 2001 From: Kristian Overskeid Date: Thu, 9 Mar 2023 10:23:02 +0100 Subject: [PATCH 0401/2078] net: hsr: Don't log netdev_err message on unknown prp dst node If no frames has been exchanged with a node for HSR_NODE_FORGET_TIME, the node will be deleted from the node_db list. If a frame is sent to the node after it is deleted, a netdev_err message for each slave interface is produced. This should not happen with dan nodes because of supervision frames, but can happen often with san nodes, which clutters the kernel log. Since the hsr protocol does not support sans, this is only relevant for the prp protocol. Signed-off-by: Kristian Overskeid Link: https://lore.kernel.org/r/20230309092302.179586-1-koverskeid@gmail.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_framereg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 00db74d96583d..b77f1189d19d1 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - if (net_ratelimit()) + if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } -- GitLab From be50da3e9d4ad1958f7b11322d44d94d5c25a4c1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Mar 2023 10:45:59 +0100 Subject: [PATCH 0402/2078] net: virtio_net: implement exact header length guest feature Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when set implicates that device benefits from knowing the exact size of the header. For compatibility, to signal to the device that the header is reliable driver also needs to set this feature. Without this feature set by driver, device has to figure out the header size itself. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear and let the device count on the hdr_len being filled up by the exact length of header. Also note the spec already has following note about hdr_len: "Due to various bugs in implementations, this field is not useful as a guarantee of the transport header size." Without this feature the device needs to parse the header in core data path handling. Accurate information helps the device to eliminate such header parsing and directly use the hardware accelerators for GSO operation. virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). The driver already complies to fill the correct value. Introduce the feature and advertise it. Note that virtio spec also includes following note for device implementation: "Caution should be taken by the implementation so as to prevent a malicious driver from attacking the device by setting an incorrect hdr_len." There is a plan to support this feature in our emulated device. A device of SolidRun offers this feature bit. They claim this feature will save the device a few cycles for every GSO packet. Link: https://docs.oasis-open.org/virtio/virtio/v1.2/cs01/virtio-v1.2-cs01.html#x1-230006x3 Signed-off-by: Jiri Pirko Reviewed-by: Parav Pandit Reviewed-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230309094559.917857-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 6 ++++-- include/uapi/linux/virtio_net.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fb5e68ed3ec27..e85b03988733e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_USO4, - VIRTIO_NET_F_GUEST_USO6 + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_GUEST_HDRLEN }; #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ + VIRTIO_NET_F_GUEST_HDRLEN static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index b4062bed186a6..12c1c9699935e 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device -- GitLab From 7c6dddc239abe660598c49ec95ea0ed6399a4b2a Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Thu, 9 Mar 2023 20:43:47 +0300 Subject: [PATCH 0403/2078] bnxt: avoid overflow in bnxt_get_nvram_directory() The value of an arithmetic expression is subject of possible overflow due to a failure to cast operands to a larger data type before performing arithmetic. Used macro for multiplication instead operator for avoiding overflow. Found by Security Code and Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Maxim Korotkov Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20230309174347.3515-1-korotkov.maxim.s@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ec573127b7076..696f32dfe41fe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2862,7 +2862,7 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data) if (rc) return rc; - buflen = dir_entries * entry_length; + buflen = mul_u32_u32(dir_entries, entry_length); buf = hwrm_req_dma_slice(bp, req, buflen, &dma_handle); if (!buf) { hwrm_req_drop(bp, req); -- GitLab From c66b2111c9c952f3bbf454a755768e80308cc6e2 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 9 Mar 2023 14:55:54 -0300 Subject: [PATCH 0404/2078] selftests: tc-testing: add tests for action binding Add tests that check if filters can bind actions, that is create an action independently and then bind to a filter. tdc-tests under category 'infra': 1..18 ok 1 abdc - Reference pedit action object in filter ok 2 7a70 - Reference mpls action object in filter ok 3 d241 - Reference bpf action object in filter ok 4 383a - Reference connmark action object in filter ok 5 c619 - Reference csum action object in filter ok 6 a93d - Reference ct action object in filter ok 7 8bb5 - Reference ctinfo action object in filter ok 8 2241 - Reference gact action object in filter ok 9 35e9 - Reference gate action object in filter ok 10 b22e - Reference ife action object in filter ok 11 ef74 - Reference mirred action object in filter ok 12 2c81 - Reference nat action object in filter ok 13 ac9d - Reference police action object in filter ok 14 68be - Reference sample action object in filter ok 15 cf01 - Reference skbedit action object in filter ok 16 c109 - Reference skbmod action object in filter ok 17 4abc - Reference tunnel_key action object in filter ok 18 dadd - Reference vlan action object in filter Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Link: https://lore.kernel.org/r/20230309175554.304824-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/actions.json | 416 ++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/infra/actions.json diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json new file mode 100644 index 0000000000000..16f3a83605e4d --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -0,0 +1,416 @@ +[ + { + "id": "abdc", + "name": "Reference pedit action object in filter", + "category": [ + "infra", + "pedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action pedit munge offset 0 u8 clear index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action pedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action pedit" + ] + }, + { + "id": "7a70", + "name": "Reference mpls action object in filter", + "category": [ + "infra", + "mpls" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action mpls pop protocol ipv4 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mpls index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mpls" + ] + }, + { + "id": "d241", + "name": "Reference bpf action object in filter", + "category": [ + "infra", + "bpf" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action bpf index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action bpf" + ] + }, + { + "id": "383a", + "name": "Reference connmark action object in filter", + "category": [ + "infra", + "connmark" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action connmark" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action connmark index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action connmark" + ] + }, + { + "id": "c619", + "name": "Reference csum action object in filter", + "category": [ + "infra", + "csum" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action csum ip4h index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action csum index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action csum" + ] + }, + { + "id": "a93d", + "name": "Reference ct action object in filter", + "category": [ + "infra", + "ct" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ct index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ct index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ct" + ] + }, + { + "id": "8bb5", + "name": "Reference ctinfo action object in filter", + "category": [ + "infra", + "ctinfo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action ctinfo index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ctinfo index 10", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ctinfo" + ] + }, + { + "id": "2241", + "name": "Reference gact action object in filter", + "category": [ + "infra", + "gact" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gact index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gact" + ] + }, + { + "id": "35e9", + "name": "Reference gate action object in filter", + "category": [ + "infra", + "gate" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action gate priority 1 sched-entry close 100000000ns index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gate index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gate" + ] + }, + { + "id": "b22e", + "name": "Reference ife action object in filter", + "category": [ + "infra", + "ife" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ife encode allow mark pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ife" + ] + }, + { + "id": "ef74", + "name": "Reference mirred action object in filter", + "category": [ + "infra", + "mirred" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action mirred egress mirror index 1 dev lo" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mirred index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mirred" + ] + }, + { + "id": "2c81", + "name": "Reference nat action object in filter", + "category": [ + "infra", + "nat" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action nat ingress 192.168.1.1 200.200.200.1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action nat index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action nat" + ] + }, + { + "id": "ac9d", + "name": "Reference police action object in filter", + "category": [ + "infra", + "police" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action police rate 1kbit burst 10k index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action police index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action police" + ] + }, + { + "id": "68be", + "name": "Reference sample action object in filter", + "category": [ + "infra", + "sample" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action sample rate 10 group 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action sample index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action sample" + ] + }, + { + "id": "cf01", + "name": "Reference skbedit action object in filter", + "category": [ + "infra", + "skbedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbedit mark 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbedit" + ] + }, + { + "id": "c109", + "name": "Reference skbmod action object in filter", + "category": [ + "infra", + "skbmod" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbmod index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbmod" + ] + }, + { + "id": "4abc", + "name": "Reference tunnel_key action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action tunnel_key index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action tunnel_key" + ] + }, + { + "id": "dadd", + "name": "Reference vlan action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action vlan pop pipe index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action vlan index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action vlan" + ] + } +] -- GitLab From 9e36a204bd43553a9cd4bd574612cd9a5df791ea Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 13 Mar 2023 14:46:41 -0700 Subject: [PATCH 0405/2078] bpf: Disable migration when freeing stashed local kptr using obj drop When a local kptr is stashed in a map and freed when the map goes away, currently an error like the below appears: [ 39.195695] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u32:15/2875 [ 39.196549] caller is bpf_mem_free+0x56/0xc0 [ 39.196958] CPU: 15 PID: 2875 Comm: kworker/u32:15 Tainted: G O 6.2.0-13016-g22df776a9a86 #4477 [ 39.197897] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 39.198949] Workqueue: events_unbound bpf_map_free_deferred [ 39.199470] Call Trace: [ 39.199703] [ 39.199911] dump_stack_lvl+0x60/0x70 [ 39.200267] check_preemption_disabled+0xbf/0xe0 [ 39.200704] bpf_mem_free+0x56/0xc0 [ 39.201032] ? bpf_obj_new_impl+0xa0/0xa0 [ 39.201430] bpf_obj_free_fields+0x1cd/0x200 [ 39.201838] array_map_free+0xad/0x220 [ 39.202193] ? finish_task_switch+0xe5/0x3c0 [ 39.202614] bpf_map_free_deferred+0xea/0x210 [ 39.203006] ? lockdep_hardirqs_on_prepare+0xe/0x220 [ 39.203460] process_one_work+0x64f/0xbe0 [ 39.203822] ? pwq_dec_nr_in_flight+0x110/0x110 [ 39.204264] ? do_raw_spin_lock+0x107/0x1c0 [ 39.204662] ? lockdep_hardirqs_on_prepare+0xe/0x220 [ 39.205107] worker_thread+0x74/0x7a0 [ 39.205451] ? process_one_work+0xbe0/0xbe0 [ 39.205818] kthread+0x171/0x1a0 [ 39.206111] ? kthread_complete_and_exit+0x20/0x20 [ 39.206552] ret_from_fork+0x1f/0x30 [ 39.206886] This happens because the call to __bpf_obj_drop_impl I added in the patch adding support for stashing local kptrs doesn't disable migration. Prior to that patch, __bpf_obj_drop_impl logic only ran when called by a BPF progarm, whereas now it can be called from map free path, so it's necessary to explicitly disable migration. Also, refactor a bit to just call __bpf_obj_drop_impl directly instead of bothering w/ dtor union and setting pointer-to-obj_drop. Fixes: c8e187540914 ("bpf: Support __kptr to local kptrs") Reported-by: Alexei Starovoitov Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230313214641.3731908-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 12 ++++-------- kernel/bpf/btf.c | 4 +--- kernel/bpf/syscall.c | 10 +++++++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 756b85f0d0d38..71cc92a4ba487 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,18 +190,14 @@ enum btf_field_type { }; typedef void (*btf_dtor_kfunc_t)(void *); -typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *); struct btf_field_kptr { struct btf *btf; struct module *module; - union { - /* dtor used if btf_is_kernel(btf), otherwise the type - * is program-allocated and obj_drop is used - */ - btf_dtor_kfunc_t dtor; - btf_dtor_obj_drop obj_drop; - }; + /* dtor used if btf_is_kernel(btf), otherwise the type is + * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used + */ + btf_dtor_kfunc_t dtor; u32 btf_id; }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 66fad7a16b6cf..b7e5a5510b910 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3551,8 +3551,6 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, return -EINVAL; } -extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); - static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, struct btf_field_info *info) { @@ -3578,7 +3576,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, /* Type exists only in program BTF. Assume that it's a MEM_ALLOC * kptr allocated via bpf_obj_new */ - field->kptr.dtor = (void *)&__bpf_obj_drop_impl; + field->kptr.dtor = NULL; id = info->kptr.type_id; kptr_btf = (struct btf *)btf; btf_get(kptr_btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0684febc447a8..5b88301a2ae02 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -650,6 +650,8 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(obj + rec->timer_off); } +extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -679,9 +681,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, field->kptr.btf_id); WARN_ON_ONCE(!pointee_struct_meta); - field->kptr.obj_drop(xchgd_field, pointee_struct_meta ? - pointee_struct_meta->record : - NULL); + migrate_disable(); + __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? + pointee_struct_meta->record : + NULL); + migrate_enable(); } else { field->kptr.dtor(xchgd_field); } -- GitLab From bcc858689db5f2e5a8d4d6e8bc5bb9736cd80626 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:16 -0600 Subject: [PATCH 0406/2078] net: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Reviewed-by: Simon Horman Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20230310144716.1544083-1-robh@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/mdio/of_mdio.c | 4 ++-- drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 510822d6d0d90..bf10d0688eea2 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -131,7 +131,7 @@ bool of_mdiobus_child_is_phy(struct device_node *child) return true; } - if (!of_find_property(child, "compatible", NULL)) + if (!of_property_present(child, "compatible")) return true; return false; @@ -203,7 +203,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) /* auto scan for PHYs with empty reg property */ for_each_available_child_of_node(np, child) { /* Skip PHYs with reg property set */ - if (of_find_property(child, "reg", NULL)) + if (of_property_present(child, "reg")) continue; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index 52527b61341ed..e406e11481a62 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -129,7 +129,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, sdio->drive_strength = val; /* make sure there are interrupts defined in the node */ - if (!of_find_property(np, "interrupts", NULL)) + if (!of_property_present(np, "interrupts")) return; irq = irq_of_parse_and_map(np, 0); -- GitLab From 27d7fdf06fdb84455ff585b58c8034e2fab42583 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 13 Mar 2023 14:56:27 -0600 Subject: [PATCH 0407/2078] bpf: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many comments and samples in the bpf code still refer to this older debugfs path, so let's update them to avoid confusion. There are a few spots where the bpf code explicitly checks both tracefs and debugfs (tools/bpf/bpftool/tracelog.c and tools/lib/api/fs/fs.c) and I've left those alone so that the tools can continue to work with both paths. Signed-off-by: Ross Zwisler Acked-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230313205628.1058720-2-zwisler@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++---- samples/bpf/cpustat_kern.c | 4 ++-- samples/bpf/hbm.c | 4 ++-- samples/bpf/ibumad_kern.c | 4 ++-- samples/bpf/lwt_len_hist.sh | 2 +- samples/bpf/offwaketime_kern.c | 2 +- samples/bpf/task_fd_query_user.c | 4 ++-- samples/bpf/test_lwt_bpf.sh | 2 +- samples/bpf/test_overhead_tp.bpf.c | 4 ++-- tools/include/uapi/linux/bpf.h | 8 ++++---- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8c534e05b0a9..13129df937cde 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1647,17 +1647,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c index 5aefd19cdfa1a..944f13fe164a8 100644 --- a/samples/bpf/cpustat_kern.c +++ b/samples/bpf/cpustat_kern.c @@ -76,8 +76,8 @@ struct { /* * The trace events for cpu_idle and cpu_frequency are taken from: - * /sys/kernel/debug/tracing/events/power/cpu_idle/format - * /sys/kernel/debug/tracing/events/power/cpu_frequency/format + * /sys/kernel/tracing/events/power/cpu_idle/format + * /sys/kernel/tracing/events/power/cpu_frequency/format * * These two events have same format, so define one common structure. */ diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 516fbac28b716..ff58ec43f56a2 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -65,7 +65,7 @@ static void Usage(void); static void read_trace_pipe2(void); static void do_error(char *msg, bool errno_flag); -#define DEBUGFS "/sys/kernel/debug/tracing/" +#define TRACEFS "/sys/kernel/tracing/" static struct bpf_program *bpf_prog; static struct bpf_object *obj; @@ -77,7 +77,7 @@ static void read_trace_pipe2(void) FILE *outf; char *outFname = "hbm_out.log"; - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + trace_fd = open(TRACEFS "trace_pipe", O_RDONLY, 0); if (trace_fd < 0) { printf("Error opening trace_pipe\n"); return; diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 9b193231024a6..f07474c725252 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -39,8 +39,8 @@ struct { /* Taken from the current format defined in * include/trace/events/ib_umad.h * and - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format - * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_read/format + * /sys/kernel/tracing/events/ib_umad/ib_umad_write/format */ struct ib_umad_rw_args { u64 pad; diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh index 7078bfcc4f4d7..381b2c6347845 100755 --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -5,7 +5,7 @@ NS1=lwt_ns1 VETH0=tst_lwt1a VETH1=tst_lwt1b BPF_PROG=lwt_len_hist.bpf.o -TRACE_ROOT=/sys/kernel/debug/tracing +TRACE_ROOT=/sys/kernel/tracing function cleanup { # To reset saved histogram, remove pinned map diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index eb4d94742e6b4..23f12b47e9e57 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -110,7 +110,7 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta) } #if 1 -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index a33d74bd3a4b7..1e61f21804700 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -235,7 +235,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) struct bpf_link *link; ssize_t bytes; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events", event_type); kfd = open(buf, O_WRONLY | O_TRUNC, 0); CHECK_PERROR_RET(kfd < 0); @@ -252,7 +252,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) close(kfd); kfd = -1; - snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%ss/%s/id", event_type, event_alias); efd = open(buf, O_RDONLY, 0); CHECK_PERROR_RET(efd < 0); diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh index 2e9f5126963b6..0bf2d0f6bf4ba 100755 --- a/samples/bpf/test_lwt_bpf.sh +++ b/samples/bpf/test_lwt_bpf.sh @@ -21,7 +21,7 @@ IP_LOCAL="192.168.99.1" PROG_SRC="test_lwt_bpf.c" BPF_PROG="test_lwt_bpf.o" -TRACE_ROOT=/sys/kernel/debug/tracing +TRACE_ROOT=/sys/kernel/tracing CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context) function lookup_mac() diff --git a/samples/bpf/test_overhead_tp.bpf.c b/samples/bpf/test_overhead_tp.bpf.c index 67cab38819698..8b498328e9618 100644 --- a/samples/bpf/test_overhead_tp.bpf.c +++ b/samples/bpf/test_overhead_tp.bpf.c @@ -7,7 +7,7 @@ #include "vmlinux.h" #include -/* from /sys/kernel/debug/tracing/events/task/task_rename/format */ +/* from /sys/kernel/tracing/events/task/task_rename/format */ struct task_rename { __u64 pad; __u32 pid; @@ -21,7 +21,7 @@ int prog(struct task_rename *ctx) return 0; } -/* from /sys/kernel/debug/tracing/events/fib/fib_table_lookup/format */ +/* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */ struct fib_table_lookup { __u64 pad; __u32 tb_id; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d8c534e05b0a9..13129df937cde 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1647,17 +1647,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * -- GitLab From ab4c15feb2ebcf9f4abe31457d7cbc8f3de9c2ab Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 13 Mar 2023 14:56:28 -0600 Subject: [PATCH 0408/2078] selftests/bpf: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many tests in the bpf selftest code still refer to this older debugfs path, so let's update them to avoid confusion. Signed-off-by: Ross Zwisler Acked-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230313205628.1058720-3-zwisler@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/get_cgroup_id_user.c | 9 +++++++-- .../selftests/bpf/prog_tests/kprobe_multi_test.c | 7 ++++++- .../selftests/bpf/prog_tests/task_fd_query_tp.c | 9 +++++++-- .../selftests/bpf/prog_tests/tp_attach_query.c | 9 +++++++-- .../testing/selftests/bpf/prog_tests/trace_printk.c | 10 +++++++--- .../selftests/bpf/prog_tests/trace_vprintk.c | 10 +++++++--- .../selftests/bpf/progs/test_stacktrace_map.c | 2 +- tools/testing/selftests/bpf/progs/test_tracepoint.c | 2 +- tools/testing/selftests/bpf/test_ftrace.sh | 7 ++++++- tools/testing/selftests/bpf/test_tunnel.sh | 13 +++++++++---- tools/testing/selftests/bpf/trace_helpers.c | 8 ++++++-- 11 files changed, 64 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 156743cf58705..aefd83ebdcd7f 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -86,8 +86,13 @@ int main(int argc, char **argv) pid = getpid(); bpf_map_update_elem(pidmap_fd, &key, &pid, 0); - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/%s/id", probe_name); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/%s/id", probe_name); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 113dba349a57f..22be0a9a5a0a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -338,7 +338,12 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel) * Filtering out duplicates by using hashmap__add, which won't * add existing entry. */ - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + if (!f) return -EINVAL; diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index c717741bf8b65..c91eda6246576 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -17,8 +17,13 @@ static void test_task_fd_query_tp_core(const char *probe_name, if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto close_prog; - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/%s/id", probe_name); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/%s/id", probe_name); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index 770fcc3bb1bae..655d69f0ff0bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -16,8 +16,13 @@ void serial_test_tp_attach_query(void) for (i = 0; i < num_progs; i++) obj[i] = NULL; - snprintf(buf, sizeof(buf), - "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + if (access("/sys/kernel/tracing/trace", F_OK) == 0) { + snprintf(buf, sizeof(buf), + "/sys/kernel/tracing/events/sched/sched_switch/id"); + } else { + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + } efd = open(buf, O_RDONLY, 0); if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index cade7f12315f7..7b9124d506a53 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -5,7 +5,8 @@ #include "trace_printk.lskel.h" -#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" void serial_test_trace_printk(void) @@ -34,8 +35,11 @@ void serial_test_trace_printk(void) if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; - fp = fopen(TRACEBUF, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + if (access(TRACEFS_PIPE, F_OK) == 0) + fp = fopen(TRACEFS_PIPE, "r"); + else + fp = fopen(DEBUGFS_PIPE, "r"); + if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) goto cleanup; /* We do not want to wait forever if this test fails... */ diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 7a4e313e85584..44ea2fd88f4cf 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -5,7 +5,8 @@ #include "trace_vprintk.lskel.h" -#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" void serial_test_trace_vprintk(void) @@ -27,8 +28,11 @@ void serial_test_trace_vprintk(void) if (!ASSERT_OK(err, "trace_vprintk__attach")) goto cleanup; - fp = fopen(TRACEBUF, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + if (access(TRACEFS_PIPE, F_OK) == 0) + fp = fopen(TRACEFS_PIPE, "r"); + else + fp = fopen(DEBUGFS_PIPE, "r"); + if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) goto cleanup; /* We do not want to wait forever if this test fails... */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 728dbd39eff0b..47568007b6683 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -38,7 +38,7 @@ struct { __type(value, stack_trace_t); } stack_amap SEC(".maps"); -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 43bd7a20cc503..4cb8bbb6a3206 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -4,7 +4,7 @@ #include #include -/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[TASK_COMM_LEN]; diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh index 20de7bb873bc7..f5109eb0e9513 100755 --- a/tools/testing/selftests/bpf/test_ftrace.sh +++ b/tools/testing/selftests/bpf/test_ftrace.sh @@ -1,6 +1,11 @@ #!/bin/bash -TR=/sys/kernel/debug/tracing/ +if [[ -e /sys/kernel/tracing/trace ]]; then + TR=/sys/kernel/tracing/ +else + TR=/sys/kernel/debug/tracing/ +fi + clear_trace() { # reset trace output echo > $TR/trace } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 06857b689c115..2dec7dbf29a27 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -571,8 +571,13 @@ setup_xfrm_tunnel() test_xfrm_tunnel() { + if [[ -e /sys/kernel/tracing/trace ]]; then + TRACE=/sys/kernel/tracing/trace + else + TRACE=/sys/kernel/debug/tracing/trace + fi config_device - > /sys/kernel/debug/tracing/trace + > ${TRACE} setup_xfrm_tunnel mkdir -p ${BPF_PIN_TUNNEL_DIR} bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} @@ -581,11 +586,11 @@ test_xfrm_tunnel() ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 sleep 1 - grep "reqid 1" /sys/kernel/debug/tracing/trace + grep "reqid 1" ${TRACE} check_err $? - grep "spi 0x1" /sys/kernel/debug/tracing/trace + grep "spi 0x1" ${TRACE} check_err $? - grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace + grep "remote ip 0xac100164" ${TRACE} check_err $? cleanup diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 09a16a77bae4b..934bf28fc8881 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -12,7 +12,8 @@ #include #include "trace_helpers.h" -#define DEBUGFS "/sys/kernel/debug/tracing/" +#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" +#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define MAX_SYMS 300000 static struct ksym syms[MAX_SYMS]; @@ -136,7 +137,10 @@ void read_trace_pipe(void) { int trace_fd; - trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (access(TRACEFS_PIPE, F_OK) == 0) + trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0); + else + trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0); if (trace_fd < 0) return; -- GitLab From b9fe8e8d03d0df28b2431e3aaf8e115cf7bf2f65 Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Fri, 10 Mar 2023 23:38:14 +0000 Subject: [PATCH 0409/2078] bpf, docs: Add signed comparison example Improve clarity by adding an example of a signed comparison instruction Signed-off-by: Dave Thaler Acked-by: David Vernet Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230310233814.4641-1-dthaler1968@googlemail.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/instruction-set.rst | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 5e43e14abe80e..b446405890554 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -11,7 +11,8 @@ Documentation conventions ========================= For brevity, this document uses the type notion "u64", "u32", etc. -to mean an unsigned integer whose width is the specified number of bits. +to mean an unsigned integer whose width is the specified number of bits, +and "s32", etc. to mean a signed integer of the specified number of bits. Registers and calling convention ================================ @@ -264,6 +265,14 @@ BPF_JSLE 0xd0 PC += off if dst <= src signed The eBPF program needs to store the return value into register R0 before doing a BPF_EXIT. +Example: + +``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means:: + + if (s32)dst s>= (s32)src goto +offset + +where 's>=' indicates a signed '>=' comparison. + Helper functions ~~~~~~~~~~~~~~~~ -- GitLab From c9267aa8b794c2188d49c7d7bd2990e98b2d6b84 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:43 -0700 Subject: [PATCH 0410/2078] bpf: Fix bpf_strncmp proto. bpf_strncmp() doesn't write into its first argument. Make sure that the verifier knows about it. Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-2-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 77d64b6951b9c..f753676ef6522 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -571,7 +571,7 @@ static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_PTR_TO_CONST_STR, }; -- GitLab From 3e30be4288b31702d4898487a74e80ba14150a9f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:44 -0700 Subject: [PATCH 0411/2078] bpf: Allow helpers access trusted PTR_TO_BTF_ID. The verifier rejects the code: bpf_strncmp(task->comm, 16, "my_task"); with the message: 16: (85) call bpf_strncmp#182 R1 type=trusted_ptr_ expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf Teach the verifier that such access pattern is safe. Do not allow untrusted and legacy ptr_to_btf_id to be passed into helpers. Reported-by: David Vernet Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-3-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 883d4ff2e2883..2bbd89279070a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6303,6 +6303,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER, meta); + case PTR_TO_BTF_ID: + return check_ptr_to_btf_access(env, regs, regno, reg->off, + access_size, BPF_READ, -1); case PTR_TO_CTX: /* in case the function doesn't know how to access the context, * (because we are in a program of type SYSCALL for example), we @@ -7014,6 +7017,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MEM, PTR_TO_MEM | MEM_RINGBUF, PTR_TO_BUF, + PTR_TO_BTF_ID | PTR_TRUSTED, }, }; @@ -7145,6 +7149,17 @@ found: if (base_type(reg->type) != PTR_TO_BTF_ID) return 0; + if (compatible == &mem_types) { + if (!(arg_type & MEM_RDONLY)) { + verbose(env, + "%s() may write into memory pointed by R%d type=%s\n", + func_id_name(meta->func_id), + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + return 0; + } + switch ((int)reg->type) { case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_TRUSTED: -- GitLab From f25fd6088216bd257902e5c212177cddcb291218 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 13 Mar 2023 16:58:45 -0700 Subject: [PATCH 0412/2078] selftests/bpf: Add various tests to check helper access into ptr_to_btf_id. Add various tests to check helper access into ptr_to_btf_id. Signed-off-by: Alexei Starovoitov Acked-by: David Vernet Link: https://lore.kernel.org/r/20230313235845.61029-4-alexei.starovoitov@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/progs/task_kfunc_failure.c | 36 +++++++++++++++++++ .../selftests/bpf/progs/task_kfunc_success.c | 4 +++ 2 files changed, 40 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 002c7f69e47f5..27994d6b29142 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -301,3 +301,39 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) bpf_task_release(acquired); return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("access beyond the end of member comm") +int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags) +{ + bpf_strncmp(task->comm, 17, "foo"); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("access beyond the end of member comm") +int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags) +{ + bpf_strncmp(task->comm + 1, 16, "foo"); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("write into memory") +int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags) +{ + bpf_probe_read_kernel(task->comm, 16, task->comm); + return 0; +} + +SEC("fentry/__set_task_comm") +__failure __msg("R1 type=ptr_ expected") +int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec) +{ + /* + * task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee + * its safety. Hence it cannot be accessed with normal load insns. + */ + bpf_strncmp(task->comm, 16, "foo"); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index aebc4bb14e7dd..4f61596b0242f 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -207,6 +207,10 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla if (!is_test_kfunc_task()) return 0; + bpf_strncmp(task->comm, 12, "foo"); + bpf_strncmp(task->comm, 16, "foo"); + bpf_strncmp(&task->comm[8], 4, "foo"); + if (is_pid_lookup_valid(-1)) { err = 1; return 0; -- GitLab From 487deb3e3393cccff0f148c4703efb185d46e314 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:50 +0100 Subject: [PATCH 0413/2078] selftests/bpf: robustify test_xdp_do_redirect with more payload magics Currently, the test relies on that only dropped ("xmitted") frames will be recycled and if a frame became an skb, it will be freed later by the stack and never come back to its page_pool. So, it easily gets broken by trying to recycle skbs[0]: test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:FAIL:pkt_count_zero unexpected pkt_count_zero: actual 9936 != expected 2 test_xdp_do_redirect:PASS:pkt_count_tc 0 nsec That huge mismatch happened because after the TC ingress hook zeroes the magic, the page gets recycled when skb is freed, not returned to the MM layer. "Live frames" mode initializes only new pages and keeps the recycled ones as is by design, so they appear with zeroed magic on the Rx path again. Expand the possible magic values from two: 0 (was "xmitted"/dropped or did hit the TC hook) and 0x42 (hit the input XDP prog) to three: the new one will mark frames hit the TC hook, so that they will elide both @pkt_count_zero and @pkt_count_xdp. They can then be recycled to their page_pool or returned to the page allocator, this won't affect the counters anyhow. Just make sure to mark them as "input" (0x42) when they appear on the Rx path again. Also make an enum from those magics, so that they will be always visible and can be changed in just one place anytime. This also eases adding any new marks later on. Link: https://github.com/kernel-patches/bpf/actions/runs/4386538411/jobs/7681081789 Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-2-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/test_xdp_do_redirect.c | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 77a123071940a..cd2d4e3258b89 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -4,6 +4,19 @@ #define ETH_ALEN 6 #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) + +/** + * enum frame_mark - magics to distinguish page/packet paths + * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC. + * @MARK_IN: frame is being processed by the input XDP prog. + * @MARK_SKB: frame did hit the TC ingress hook as an skb. + */ +enum frame_mark { + MARK_XMIT = 0U, + MARK_IN = 0x42, + MARK_SKB = 0x45, +}; + const volatile int ifindex_out; const volatile int ifindex_in; const volatile __u8 expect_dst[ETH_ALEN]; @@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp) if (*metadata != 0x42) return XDP_ABORTED; - if (*payload == 0) { - *payload = 0x42; + if (*payload == MARK_XMIT) pkts_seen_zero++; - } + + *payload = MARK_IN; if (bpf_xdp_adjust_meta(xdp, 4)) return XDP_ABORTED; @@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp) return ret; } -static bool check_pkt(void *data, void *data_end) +static bool check_pkt(void *data, void *data_end, const __u32 mark) { struct ipv6hdr *iph = data + sizeof(struct ethhdr); __u8 *payload = data + HDR_SZ; @@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end) if (payload + 1 > data_end) return false; - if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) + if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN) return false; /* reset the payload so the same packet doesn't get counted twice when * it cycles back through the kernel path and out the dst veth */ - *payload = 0; + *payload = mark; return true; } @@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp) void *data = (void *)(long)xdp->data; void *data_end = (void *)(long)xdp->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_XMIT)) pkts_seen_xdp++; - /* Return XDP_DROP to make sure the data page is recycled, like when it - * exits a physical NIC. Recycled pages will be counted in the + /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like + * it exited a physical NIC. Those pages will be counted in the * pkts_seen_zero counter above. */ return XDP_DROP; @@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb) void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_SKB)) pkts_seen_tc++; + /* Will be either recycled or freed, %MARK_SKB makes sure it won't + * hit any of the counters above. + */ return 0; } -- GitLab From 2c854e5fcd7e243f5a7cf6a6afa0ef83060c903c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:51 +0100 Subject: [PATCH 0414/2078] net: page_pool, skbuff: make skb_mark_for_recycle() always available skb_mark_for_recycle() is guarded with CONFIG_PAGE_POOL, this creates unneeded complication when using it in the generic code. For now, it's only used in the drivers always selecting Page Pool, so this works. Move the guards so that preprocessor will cut out only the operation itself and the function will still be a noop on !PAGE_POOL systems, but available there as well. No functional changes. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303020342.Wi2PRFFH-lkp@intel.com Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe661011644b8..3f3a2a82a86b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) #endif } -#ifdef CONFIG_PAGE_POOL static inline void skb_mark_for_recycle(struct sk_buff *skb) { +#ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; -} #endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- GitLab From 9c94bbf9a87b264294f42e6cc0f76d87854733ec Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:52 +0100 Subject: [PATCH 0415/2078] xdp: recycle Page Pool backed skbs built from XDP frames __xdp_build_skb_from_frame() state(d): /* Until page_pool get SKB return path, release DMA here */ Page Pool got skb pages recycling in April 2021, but missed this function. xdp_release_frame() is relevant only for Page Pool backed frames and it detaches the page from the corresponding page_pool in order to make it freeable via page_frag_free(). It can instead just mark the output skb as eligible for recycling if the frame is backed by a pp. No change for other memory model types (the same condition check as before). cpumap redirect and veth on Page Pool drivers now become zero-alloc (or almost). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-4-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- net/core/xdp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc5533177..a2237cfca8e9f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -658,8 +658,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - /* Until page_pool get SKB return path, release DMA here */ - xdp_release_frame(xdpf); + if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); -- GitLab From d4e492338d11937c55841b1279287280d6e35894 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Mar 2023 22:55:53 +0100 Subject: [PATCH 0416/2078] xdp: remove unused {__,}xdp_release_frame() __xdp_build_skb_from_frame() was the last user of {__,}xdp_release_frame(), which detaches pages from the page_pool. All the consumers now recycle Page Pool skbs and page, except mlx5, stmmac and tsnep drivers, which use page_pool_release_page() directly (might change one day). It's safe to assume this functionality is not needed anymore and can be removed (in favor of recycling). Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230313215553.1045175-5-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 29 ----------------------------- net/core/xdp.c | 15 --------------- 2 files changed, 44 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index d517bfac937b0..5393b3ebe56e5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -/* When sending xdp_frame into the network stack, then there is no - * return point callback, which is needed to release e.g. DMA-mapping - * resources with page_pool. Thus, have explicit function to release - * frame resources. - */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem); -static inline void xdp_release_frame(struct xdp_frame *xdpf) -{ - struct xdp_mem_info *mem = &xdpf->mem; - struct skb_shared_info *sinfo; - int i; - - /* Curr only page_pool needs this */ - if (mem->type != MEM_TYPE_PAGE_POOL) - return; - - if (likely(!xdp_frame_has_frags(xdpf))) - goto out; - - sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); - - __xdp_release_frame(page_address(page), mem); - } -out: - __xdp_release_frame(xdpf->data, mem); -} - static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; diff --git a/net/core/xdp.c b/net/core/xdp.c index a2237cfca8e9f..8d3ad315f18df 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -531,21 +531,6 @@ out: } EXPORT_SYMBOL_GPL(xdp_return_buff); -/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem) -{ - struct xdp_mem_allocator *xa; - struct page *page; - - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - page = virt_to_head_page(data); - if (xa) - page_pool_release_page(xa->page_pool, page); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(__xdp_release_frame); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { -- GitLab From 3c2611bac08a834697be918ac357eaff2e47d5b3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Mar 2023 15:28:11 -0700 Subject: [PATCH 0417/2078] selftests/bpf: Fix trace_virtqueue_add_sgs test issue with LLVM 17. LLVM commit https://reviews.llvm.org/D143726 introduced hoistMinMax optimization that transformed (i < VIRTIO_MAX_SGS) && (i < out_sgs) into i < MIN(VIRTIO_MAX_SGS, out_sgs) and caused the verifier to stop recognizing such loop as bounded. Which resulted in the following test failure: libbpf: prog 'trace_virtqueue_add_sgs': BPF program load failed: Bad address libbpf: prog 'trace_virtqueue_add_sgs': -- BEGIN PROG LOAD LOG -- The sequence of 8193 jumps is too complex. verification time 789206 usec stack depth 56 processed 156446 insns (limit 1000000) max_states_per_insn 7 total_states 1746 peak_states 1701 mark_read 12 -- END PROG LOAD LOG -- libbpf: prog 'trace_virtqueue_add_sgs': failed to load: -14 libbpf: failed to load object 'loop6.bpf.o' Workaround the verifier limitation for now with inline asm that prevents this particular optimization. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/loop6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c index 38de0331e6b46..e4ff97fbcce18 100644 --- a/tools/testing/selftests/bpf/progs/loop6.c +++ b/tools/testing/selftests/bpf/progs/loop6.c @@ -5,6 +5,7 @@ #include #include #include +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -76,6 +77,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, return 0; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) { + __sink(out_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); @@ -85,6 +87,7 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, } for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) { + __sink(in_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); -- GitLab From b8a2e3f93d412114a1539ea97b59b3e6ed6e1f9a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Mar 2023 11:59:49 -1000 Subject: [PATCH 0418/2078] cgroup: Make current_cgns_cgroup_dfl() safe to call after exit_task_namespace() The commit 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") added bpf_cgroup_from_id() which calls current_cgns_cgroup_dfl() through cgroup_get_from_id(). However, BPF programs may be attached to a point where current->nsproxy has already been cleared to NULL by exit_task_namespace() and calling bpf_cgroup_from_id() would cause an oops. Just return the system-wide root if nsproxy has been cleared. This allows all cgroups to be looked up after the task passed through exit_task_namespace(), which semantically makes sense. Given that the only way to get this behavior is through BPF programs, it seems safe but let's see what others think. Fixes: 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/ZBDuVWiFj2jiz3i8@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/cgroup/cgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 935e8121b21e6..8a5294f4ce720 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1465,8 +1465,18 @@ static struct cgroup *current_cgns_cgroup_dfl(void) { struct css_set *cset; - cset = current->nsproxy->cgroup_ns->root_cset; - return __cset_cgroup_from_root(cset, &cgrp_dfl_root); + if (current->nsproxy) { + cset = current->nsproxy->cgroup_ns->root_cset; + return __cset_cgroup_from_root(cset, &cgrp_dfl_root); + } else { + /* + * NOTE: This function may be called from bpf_cgroup_from_id() + * on a task which has already passed exit_task_namespaces() and + * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all + * cgroups visible for lookups. + */ + return &cgrp_dfl_root.cgrp; + } } /* look up cgroup associated with given css_set on the specified hierarchy */ -- GitLab From 68a84a127bb07d2a47e95c808520f742c54781c7 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 12 Mar 2023 20:51:55 +0100 Subject: [PATCH 0419/2078] net: lan966x: Change lan966x_police_del return type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the function always returns 0 change the return type to be void instead of int. In this way also remove a wrong message in case of error which would never happen. Signed-off-by: Horatiu Vultur Reviewed-by: Alvin Šipraga Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230312195155.1492881-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan966x/lan966x_police.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c index 7d66fe75cd3bf..7302df2300fd2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c @@ -49,8 +49,7 @@ static int lan966x_police_add(struct lan966x_port *port, return 0; } -static int lan966x_police_del(struct lan966x_port *port, - u16 pol_idx) +static void lan966x_police_del(struct lan966x_port *port, u16 pol_idx) { struct lan966x *lan966x = port->lan966x; @@ -67,8 +66,6 @@ static int lan966x_police_del(struct lan966x_port *port, lan_wr(ANA_POL_PIR_CFG_PIR_RATE_SET(GENMASK(14, 0)) | ANA_POL_PIR_CFG_PIR_BURST_SET(0), lan966x, ANA_POL_PIR_CFG(pol_idx)); - - return 0; } static int lan966x_police_validate(struct lan966x_port *port, @@ -186,7 +183,6 @@ int lan966x_police_port_del(struct lan966x_port *port, struct netlink_ext_ack *extack) { struct lan966x *lan966x = port->lan966x; - int err; if (port->tc.police_id != police_id) { NL_SET_ERR_MSG_MOD(extack, @@ -194,12 +190,7 @@ int lan966x_police_port_del(struct lan966x_port *port, return -EINVAL; } - err = lan966x_police_del(port, POL_IDX_PORT + port->chip_port); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Failed to add policer to port"); - return err; - } + lan966x_police_del(port, POL_IDX_PORT + port->chip_port); lan_rmw(ANA_POL_CFG_PORT_POL_ENA_SET(0) | ANA_POL_CFG_POL_ORDER_SET(POL_ORDER), -- GitLab From b071af523579df7341cabf0f16fc661125e9a13f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Mar 2023 20:17:31 +0000 Subject: [PATCH 0420/2078] neighbour: annotate lockless accesses to n->nud_state We have many lockless accesses to n->nud_state. Before adding another one in the following patch, add annotations to readers and writers. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Martin KaFai Lau Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 4 ++-- include/net/neighbour.h | 2 +- net/bridge/br_arp_nd_proxy.c | 4 ++-- net/bridge/br_netfilter_hooks.c | 3 ++- net/core/filter.c | 4 ++-- net/core/neighbour.c | 28 ++++++++++++++-------------- net/ipv4/arp.c | 8 ++++---- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/nexthop.c | 4 ++-- net/ipv4/route.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 2 +- 13 files changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index b1b179effe2a1..f2c30214cae8e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1863,7 +1863,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct vxlan_fdb *f; struct sk_buff *reply; - if (!(n->nud_state & NUD_CONNECTED)) { + if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) { neigh_release(n); goto out; } @@ -2027,7 +2027,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct vxlan_fdb *f; struct sk_buff *reply; - if (!(n->nud_state & NUD_CONNECTED)) { + if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) { neigh_release(n); goto out; } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 234799ca527e0..c8d39bba2a0de 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -464,7 +464,7 @@ static __always_inline int neigh_event_send_probe(struct neighbour *neigh, if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) return __neigh_event_send(neigh, skb, immediate_ok); return 0; } diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index e5e48c6e35d78..b45c00c01dea1 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } @@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 638a4d5359db0..3e3065bc0465c 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { + if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && + READ_ONCE(neigh->hh.hh_len)) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); diff --git a/net/core/filter.c b/net/core/filter.c index 50f649f1b4a90..d052fac28d02e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5871,7 +5871,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, else neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); @@ -5992,7 +5992,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, * not needed here. */ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0116b0ff91a78..90d399b3f9800 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1093,13 +1093,13 @@ static void neigh_timer_handler(struct timer_list *t) neigh->used + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_suspect(neigh); next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->nud_state = NUD_STALE; + WRITE_ONCE(neigh->nud_state, NUD_STALE); neigh->updated = jiffies; neigh_suspect(neigh); notify = 1; @@ -1109,14 +1109,14 @@ static void neigh_timer_handler(struct timer_list *t) neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); - neigh->nud_state = NUD_REACHABLE; + WRITE_ONCE(neigh->nud_state, NUD_REACHABLE); neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { neigh_dbg(2, "neigh %p is probed\n", neigh); - neigh->nud_state = NUD_PROBE; + WRITE_ONCE(neigh->nud_state, NUD_PROBE); neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; @@ -1130,7 +1130,7 @@ static void neigh_timer_handler(struct timer_list *t) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); notify = 1; neigh_invalidate(neigh); goto out; @@ -1179,7 +1179,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); neigh->updated = now; if (!immediate_ok) { next = now + 1; @@ -1191,7 +1191,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } neigh_add_timer(neigh, next); } else { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); neigh->updated = jiffies; write_unlock_bh(&neigh->lock); @@ -1201,7 +1201,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh_del_timer(neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); @@ -1313,7 +1313,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { new = old & ~NUD_PERMANENT; - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; goto out; } @@ -1322,7 +1322,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1401,7 +1401,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); notify = 1; } @@ -1488,7 +1488,7 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -3198,7 +3198,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: n = rcu_dereference_bh(n->next); @@ -3240,7 +3240,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: n = rcu_dereference_bh(n->next); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4f7237661afb9..9456f5bb35e5d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; @@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - if (!(neigh->nud_state & NUD_NOARP)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); r->arp_flags = arp_state_to_flags(neigh); @@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) struct neigh_table *tbl = &arp_tbl; if (neigh) { - if ((neigh->nud_state & NUD_VALID) && !force) { + if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) { neigh_release(neigh); return 0; } - if (neigh->nud_state & ~NUD_NOARP) + if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3bb890a40ed73..574ff450c4d23 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,7 +563,7 @@ static int fib_detect_death(struct fib_info *fi, int order, n = NULL; if (n) { - state = n->nud_state; + state = READ_ONCE(n->nud_state); neigh_release(n); } else { return 0; @@ -2202,7 +2202,7 @@ static bool fib_good_nh(const struct fib_nh *nh) else n = NULL; if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); rcu_read_unlock_bh(); } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d8ef05347fd98..e28a99f1996b7 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1128,7 +1128,7 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); rcu_read_unlock_bh(); @@ -1145,7 +1145,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); rcu_read_unlock_bh(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index de6e3515ab4fe..232009d216c4e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4ce3f9d3bc8ac..e5ed39a3c65f8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1153,7 +1153,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); - err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); if (err) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c4be62c99f737..18634ebd20a47 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -745,7 +745,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); @@ -1090,7 +1090,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) u8 old_flags = neigh->flags; struct net *net = dev_net(dev); - if (neigh->nud_state & NUD_FAILED) + if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0fdb03df22870..25c00c6f5131c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -638,7 +638,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - if (neigh->nud_state & NUD_VALID) + if (READ_ONCE(neigh->nud_state) & NUD_VALID) goto out; write_lock(&neigh->lock); -- GitLab From c486640aa710ddd06c13a7f7162126e1552e8842 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Mar 2023 20:17:32 +0000 Subject: [PATCH 0421/2078] ipv6: remove one read_lock()/read_unlock() pair in rt6_check_neigh() rt6_check_neigh() uses read_lock() to protect n->nud_state reading. This seems overkill and causes false sharing. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Martin KaFai Lau Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 25c00c6f5131c..e829bd8803840 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -687,16 +687,16 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, &fib6_nh->fib_nh_gw6); if (neigh) { - read_lock(&neigh->lock); - if (neigh->nud_state & NUD_VALID) + u8 nud_state = READ_ONCE(neigh->nud_state); + + if (nud_state & NUD_VALID) ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (!(neigh->nud_state & NUD_FAILED)) + else if (!(nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; else ret = RT6_NUD_FAIL_PROBE; #endif - read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; -- GitLab From 69444581d0022b8afced2c90c441b7b4d9b8eba9 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 10 Mar 2023 10:08:08 +0100 Subject: [PATCH 0422/2078] net: dsa: microchip: add ksz_setup_tc_mode() function Add ksz_setup_tc_mode() to make queue scheduling and shaping configuration more visible. Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 20 ++++++++++++-------- drivers/net/dsa/microchip/ksz_common.h | 6 ++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 729b36eeb2c46..11f20501524f2 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -32,10 +32,6 @@ #include "ksz9477.h" #include "lan937x.h" -#define KSZ_CBS_ENABLE ((MTI_SCHEDULE_STRICT_PRIO << MTI_SCHEDULE_MODE_S) | \ - (MTI_SHAPING_SRP << MTI_SHAPING_S)) -#define KSZ_CBS_DISABLE ((MTI_SCHEDULE_WRR << MTI_SCHEDULE_MODE_S) |\ - (MTI_SHAPING_OFF << MTI_SHAPING_S)) #define MIB_COUNTER_NUM 0x20 struct ksz_stats_raw { @@ -3091,6 +3087,14 @@ static int cinc_cal(s32 idle_slope, s32 send_slope, u32 *bw) return 0; } +static int ksz_setup_tc_mode(struct ksz_device *dev, int port, u8 scheduler, + u8 shaper) +{ + return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0, + FIELD_PREP(MTI_SCHEDULE_MODE_M, scheduler) | + FIELD_PREP(MTI_SHAPING_M, shaper)); +} + static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port, struct tc_cbs_qopt_offload *qopt) { @@ -3110,8 +3114,8 @@ static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port, return ret; if (!qopt->enable) - return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0, - KSZ_CBS_DISABLE); + return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_WRR, + MTI_SHAPING_OFF); /* High Credit */ ret = ksz_pwrite16(dev, port, REG_PORT_MTI_HI_WATER_MARK, @@ -3136,8 +3140,8 @@ static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port, return ret; } - return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0, - KSZ_CBS_ENABLE); + return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_STRICT_PRIO, + MTI_SHAPING_SRP); } static int ksz_setup_tc(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index d2d5761d58e9b..7d87c9a79fb47 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -654,12 +654,10 @@ static inline int is_lan937x(struct ksz_device *dev) #define REG_PORT_MTI_QUEUE_CTRL_0 0x0914 -#define MTI_SCHEDULE_MODE_M 0x3 -#define MTI_SCHEDULE_MODE_S 6 +#define MTI_SCHEDULE_MODE_M GENMASK(7, 6) #define MTI_SCHEDULE_STRICT_PRIO 0 #define MTI_SCHEDULE_WRR 2 -#define MTI_SHAPING_M 0x3 -#define MTI_SHAPING_S 4 +#define MTI_SHAPING_M GENMASK(5, 4) #define MTI_SHAPING_OFF 0 #define MTI_SHAPING_SRP 1 #define MTI_SHAPING_TIME_AWARE 2 -- GitLab From c570f861fa059ea653599415a7c8cc1dfaf16763 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 10 Mar 2023 10:08:09 +0100 Subject: [PATCH 0423/2078] net: dsa: microchip: add ETS Qdisc support for KSZ9477 series Add ETS Qdisc support for KSZ9477 of switches. Current implementation is limited to strict priority mode. Tested on KSZ8563R with following configuration: tc qdisc replace dev lan2 root handle 1: ets strict 4 \ priomap 3 3 2 2 1 1 0 0 ip link add link lan2 name v1 type vlan id 1 \ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 and patched iperf3 version: https://github.com/esnet/iperf/pull/1476 iperf3 -c 172.17.0.1 -b100M -l1472 -t100 -u -R --sock-prio 2 Signed-off-by: Oleksij Rempel Acked-by: Arun Ramadoss Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 218 +++++++++++++++++++++++++ drivers/net/dsa/microchip/ksz_common.h | 12 ++ 2 files changed, 230 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 11f20501524f2..5a7ce2aede68b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1085,6 +1085,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 3, .num_tx_queues = 4, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &ksz9477_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1224,6 +1225,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 4, .num_tx_queues = 4, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &ksz9477_dev_ops, .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, @@ -1348,6 +1350,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 3, .num_tx_queues = 4, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &ksz9477_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1375,6 +1378,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 3, .num_tx_queues = 4, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &ksz9477_dev_ops, .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, @@ -1407,6 +1411,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 6, .num_tx_queues = 8, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &lan937x_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1433,6 +1438,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 6, .num_tx_queues = 8, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &lan937x_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1459,6 +1465,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 6, .num_tx_queues = 8, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &lan937x_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1489,6 +1496,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 6, .num_tx_queues = 8, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &lan937x_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -1519,6 +1527,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .port_nirqs = 6, .num_tx_queues = 8, .tc_cbs_supported = true, + .tc_ets_supported = true, .ops = &lan937x_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), @@ -3144,12 +3153,221 @@ static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port, MTI_SHAPING_SRP); } +static int ksz_disable_egress_rate_limit(struct ksz_device *dev, int port) +{ + int queue, ret; + + /* Configuration will not take effect until the last Port Queue X + * Egress Limit Control Register is written. + */ + for (queue = 0; queue < dev->info->num_tx_queues; queue++) { + ret = ksz_pwrite8(dev, port, KSZ9477_REG_PORT_OUT_RATE_0 + queue, + KSZ9477_OUT_RATE_NO_LIMIT); + if (ret) + return ret; + } + + return 0; +} + +static int ksz_ets_band_to_queue(struct tc_ets_qopt_offload_replace_params *p, + int band) +{ + /* Compared to queues, bands prioritize packets differently. In strict + * priority mode, the lowest priority is assigned to Queue 0 while the + * highest priority is given to Band 0. + */ + return p->bands - 1 - band; +} + +static int ksz_queue_set_strict(struct ksz_device *dev, int port, int queue) +{ + int ret; + + ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue); + if (ret) + return ret; + + return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_STRICT_PRIO, + MTI_SHAPING_OFF); +} + +static int ksz_queue_set_wrr(struct ksz_device *dev, int port, int queue, + int weight) +{ + int ret; + + ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue); + if (ret) + return ret; + + ret = ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_WRR, + MTI_SHAPING_OFF); + if (ret) + return ret; + + return ksz_pwrite8(dev, port, KSZ9477_PORT_MTI_QUEUE_CTRL_1, weight); +} + +static int ksz_tc_ets_add(struct ksz_device *dev, int port, + struct tc_ets_qopt_offload_replace_params *p) +{ + int ret, band, tc_prio; + u32 queue_map = 0; + + /* In order to ensure proper prioritization, it is necessary to set the + * rate limit for the related queue to zero. Otherwise strict priority + * or WRR mode will not work. This is a hardware limitation. + */ + ret = ksz_disable_egress_rate_limit(dev, port); + if (ret) + return ret; + + /* Configure queue scheduling mode for all bands. Currently only strict + * prio mode is supported. + */ + for (band = 0; band < p->bands; band++) { + int queue = ksz_ets_band_to_queue(p, band); + + ret = ksz_queue_set_strict(dev, port, queue); + if (ret) + return ret; + } + + /* Configure the mapping between traffic classes and queues. Note: + * priomap variable support 16 traffic classes, but the chip can handle + * only 8 classes. + */ + for (tc_prio = 0; tc_prio < ARRAY_SIZE(p->priomap); tc_prio++) { + int queue; + + if (tc_prio > KSZ9477_MAX_TC_PRIO) + break; + + queue = ksz_ets_band_to_queue(p, p->priomap[tc_prio]); + queue_map |= queue << (tc_prio * KSZ9477_PORT_TC_MAP_S); + } + + return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4, queue_map); +} + +static int ksz_tc_ets_del(struct ksz_device *dev, int port) +{ + int ret, queue, tc_prio, s; + u32 queue_map = 0; + + /* To restore the default chip configuration, set all queues to use the + * WRR scheduler with a weight of 1. + */ + for (queue = 0; queue < dev->info->num_tx_queues; queue++) { + ret = ksz_queue_set_wrr(dev, port, queue, + KSZ9477_DEFAULT_WRR_WEIGHT); + if (ret) + return ret; + } + + switch (dev->info->num_tx_queues) { + case 2: + s = 2; + break; + case 4: + s = 1; + break; + case 8: + s = 0; + break; + default: + return -EINVAL; + } + + /* Revert the queue mapping for TC-priority to its default setting on + * the chip. + */ + for (tc_prio = 0; tc_prio <= KSZ9477_MAX_TC_PRIO; tc_prio++) { + int queue; + + queue = tc_prio >> s; + queue_map |= queue << (tc_prio * KSZ9477_PORT_TC_MAP_S); + } + + return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4, queue_map); +} + +static int ksz_tc_ets_validate(struct ksz_device *dev, int port, + struct tc_ets_qopt_offload_replace_params *p) +{ + int band; + + /* Since it is not feasible to share one port among multiple qdisc, + * the user must configure all available queues appropriately. + */ + if (p->bands != dev->info->num_tx_queues) { + dev_err(dev->dev, "Not supported amount of bands. It should be %d\n", + dev->info->num_tx_queues); + return -EOPNOTSUPP; + } + + for (band = 0; band < p->bands; ++band) { + /* The KSZ switches utilize a weighted round robin configuration + * where a certain number of packets can be transmitted from a + * queue before the next queue is serviced. For more information + * on this, refer to section 5.2.8.4 of the KSZ8565R + * documentation on the Port Transmit Queue Control 1 Register. + * However, the current ETS Qdisc implementation (as of February + * 2023) assigns a weight to each queue based on the number of + * bytes or extrapolated bandwidth in percentages. Since this + * differs from the KSZ switches' method and we don't want to + * fake support by converting bytes to packets, it is better to + * return an error instead. + */ + if (p->quanta[band]) { + dev_err(dev->dev, "Quanta/weights configuration is not supported.\n"); + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port, + struct tc_ets_qopt_offload *qopt) +{ + struct ksz_device *dev = ds->priv; + int ret; + + if (!dev->info->tc_ets_supported) + return -EOPNOTSUPP; + + if (qopt->parent != TC_H_ROOT) { + dev_err(dev->dev, "Parent should be \"root\"\n"); + return -EOPNOTSUPP; + } + + switch (qopt->command) { + case TC_ETS_REPLACE: + ret = ksz_tc_ets_validate(dev, port, &qopt->replace_params); + if (ret) + return ret; + + return ksz_tc_ets_add(dev, port, &qopt->replace_params); + case TC_ETS_DESTROY: + return ksz_tc_ets_del(dev, port); + case TC_ETS_STATS: + case TC_ETS_GRAFT: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; +} + static int ksz_setup_tc(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_QDISC_CBS: return ksz_setup_tc_cbs(ds, port, type_data); + case TC_SETUP_QDISC_ETS: + return ksz_tc_setup_qdisc_ets(ds, port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 7d87c9a79fb47..8abecaf6089ef 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -51,6 +51,7 @@ struct ksz_chip_data { u8 port_nirqs; u8 num_tx_queues; bool tc_cbs_supported; + bool tc_ets_supported; const struct ksz_dev_ops *ops; bool phy_errata_9477; bool ksz87xx_eee_link_erratum; @@ -649,6 +650,14 @@ static inline int is_lan937x(struct ksz_device *dev) #define KSZ8_LEGAL_PACKET_SIZE 1518 #define KSZ9477_MAX_FRAME_SIZE 9000 +#define KSZ9477_REG_PORT_OUT_RATE_0 0x0420 +#define KSZ9477_OUT_RATE_NO_LIMIT 0 + +#define KSZ9477_PORT_MRI_TC_MAP__4 0x0808 + +#define KSZ9477_PORT_TC_MAP_S 4 +#define KSZ9477_MAX_TC_PRIO 7 + /* CBS related registers */ #define REG_PORT_MTI_QUEUE_INDEX__4 0x0900 @@ -662,6 +671,9 @@ static inline int is_lan937x(struct ksz_device *dev) #define MTI_SHAPING_SRP 1 #define MTI_SHAPING_TIME_AWARE 2 +#define KSZ9477_PORT_MTI_QUEUE_CTRL_1 0x0915 +#define KSZ9477_DEFAULT_WRR_WEIGHT 1 + #define REG_PORT_MTI_HI_WATER_MARK 0x0916 #define REG_PORT_MTI_LO_WATER_MARK 0x0918 -- GitLab From dc54e450a5dd0fd9147dc9fea0684293569d3609 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Mar 2023 22:46:28 +0100 Subject: [PATCH 0424/2078] net: stmmac: qcom: drop of_match_ptr for ID table The driver is specific to ARCH_QCOM which depends on OF thus the driver is OF-only. Its of_device_id table is built unconditionally, thus of_match_ptr() for ID table does not make sense. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 732774645c1a6..32763566c2149 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -606,7 +606,7 @@ static struct platform_driver qcom_ethqos_driver = { .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, - .of_match_table = of_match_ptr(qcom_ethqos_match), + .of_match_table = qcom_ethqos_match, }, }; module_platform_driver(qcom_ethqos_driver); -- GitLab From e6512465838bf634d147399c8c96612d3cb419b9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Mar 2023 22:46:29 +0100 Subject: [PATCH 0425/2078] net: stmmac: generic: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it is not relevant here). drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c:72:34: error: ‘dwmac_generic_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index 5e731a72cce81..ef8f3a940938c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -91,7 +91,7 @@ static struct platform_driver dwmac_generic_driver = { .driver = { .name = STMMAC_RESOURCE_NAME, .pm = &stmmac_pltfr_pm_ops, - .of_match_table = of_match_ptr(dwmac_generic_match), + .of_match_table = dwmac_generic_match, }, }; module_platform_driver(dwmac_generic_driver); -- GitLab From 69df36d524dbb6f0692252c5f9d41bed9a064ff5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Mar 2023 22:46:30 +0100 Subject: [PATCH 0426/2078] net: marvell: pxa168_eth: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it is not relevant here). drivers/net/ethernet/marvell/pxa168_eth.c:1575:34: error: ‘pxa168_eth_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 87fff539d39d5..d5691b6a2bc54 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1586,7 +1586,7 @@ static struct platform_driver pxa168_eth_driver = { .suspend = pxa168_eth_suspend, .driver = { .name = DRIVER_NAME, - .of_match_table = of_match_ptr(pxa168_eth_of_match), + .of_match_table = pxa168_eth_of_match, }, }; -- GitLab From 7f319fe4363c613f8b00b4bfab66c3659c38378b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Mar 2023 22:46:31 +0100 Subject: [PATCH 0427/2078] net: samsung: sxgbe: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it is not relevant here). drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c:220:34: error: ‘sxgbe_dt_ids’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index 9265324666911..4e5526303f076 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -229,7 +229,7 @@ static struct platform_driver sxgbe_platform_driver = { .driver = { .name = SXGBE_RESOURCE_NAME, .pm = &sxgbe_platform_pm_ops, - .of_match_table = of_match_ptr(sxgbe_dt_ids), + .of_match_table = sxgbe_dt_ids, }, }; -- GitLab From 7e9aa8cad0844b85a6877cd072c95ac2844eb666 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Mar 2023 22:46:32 +0100 Subject: [PATCH 0428/2078] net: ni: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it is not relevant here). drivers/net/ethernet/ni/nixge.c:1253:34: error: ‘nixge_dt_ids’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 56e02cba0b8ac..0fd156286d4d7 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1422,7 +1422,7 @@ static struct platform_driver nixge_driver = { .remove = nixge_remove, .driver = { .name = "nixge", - .of_match_table = of_match_ptr(nixge_dt_ids), + .of_match_table = nixge_dt_ids, }, }; module_platform_driver(nixge_driver); -- GitLab From a52ed50a04de179cc866c18bdd1b9a2f59d6bdfc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 12:13:28 +0100 Subject: [PATCH 0429/2078] nfc: trf7970a: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/nfc/trf7970a.c:2232:34: error: ‘trf7970a_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Reviewed-by: Mark Greer Signed-off-by: David S. Miller --- drivers/nfc/trf7970a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 21d68664fe082..7eb17f46a8153 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -2229,7 +2229,7 @@ static const struct dev_pm_ops trf7970a_pm_ops = { trf7970a_pm_runtime_resume, NULL) }; -static const struct of_device_id trf7970a_of_match[] = { +static const struct of_device_id trf7970a_of_match[] __maybe_unused = { {.compatible = "ti,trf7970a",}, {}, }; -- GitLab From 6ea1e67788f30710d1991de42802cbdeb67afec4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:52 +0100 Subject: [PATCH 0430/2078] net: dsa: lantiq_gswip: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/net/dsa/lantiq_gswip.c:1888:34: error: ‘xway_gphy_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/dsa/lantiq_gswip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 05ecaa007ab18..3c76a1a14aee3 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1885,7 +1885,7 @@ static const struct xway_gphy_match_data xrx300_gphy_data = { .ge_firmware_name = "lantiq/xrx300_phy11g_a21.bin", }; -static const struct of_device_id xway_gphy_match[] = { +static const struct of_device_id xway_gphy_match[] __maybe_unused = { { .compatible = "lantiq,xrx200-gphy-fw", .data = NULL }, { .compatible = "lantiq,xrx200a1x-gphy-fw", .data = &xrx200a1x_gphy_data }, { .compatible = "lantiq,xrx200a2x-gphy-fw", .data = &xrx200a2x_gphy_data }, -- GitLab From ced5c5a0a2ea5eec6ed6cf1fcdde719fdafed82c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:53 +0100 Subject: [PATCH 0431/2078] net: dsa: lan9303: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly or only by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/dsa/lan9303_i2c.c:97:34: error: ‘lan9303_i2c_of_match’ defined but not used [-Werror=unused-const-variable=] drivers/net/dsa/lan9303_mdio.c:157:34: error: ‘lan9303_mdio_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303_i2c.c | 2 +- drivers/net/dsa/lan9303_mdio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 1cb41c36bd478..e8844820c3a93 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -103,7 +103,7 @@ MODULE_DEVICE_TABLE(of, lan9303_i2c_of_match); static struct i2c_driver lan9303_i2c_driver = { .driver = { .name = "LAN9303_I2C", - .of_match_table = of_match_ptr(lan9303_i2c_of_match), + .of_match_table = lan9303_i2c_of_match, }, .probe_new = lan9303_i2c_probe, .remove = lan9303_i2c_remove, diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 4f33369a2de52..d8ab2b77d201e 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -164,7 +164,7 @@ MODULE_DEVICE_TABLE(of, lan9303_mdio_of_match); static struct mdio_driver lan9303_mdio_driver = { .mdiodrv.driver = { .name = "LAN9303_MDIO", - .of_match_table = of_match_ptr(lan9303_mdio_of_match), + .of_match_table = lan9303_mdio_of_match, }, .probe = lan9303_mdio_probe, .remove = lan9303_mdio_remove, -- GitLab From 1eb8566dd08db9c7402a26b027086eaabc20ff2a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:54 +0100 Subject: [PATCH 0432/2078] net: dsa: seville_vsc9953: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/dsa/ocelot/seville_vsc9953.c:1070:34: error: ‘seville_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 563ad338da25b..96d4972a62f0f 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1079,7 +1079,7 @@ static struct platform_driver seville_vsc9953_driver = { .shutdown = seville_shutdown, .driver = { .name = "mscc_seville", - .of_match_table = of_match_ptr(seville_of_match), + .of_match_table = seville_of_match, }, }; module_platform_driver(seville_vsc9953_driver); -- GitLab From 00923ff2e1baf6bc9832f12e44f73439de5fcd72 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:55 +0100 Subject: [PATCH 0433/2078] net: dsa: ksz9477: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/dsa/microchip/ksz9477_i2c.c:84:34: error: ‘ksz9477_dt_ids’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index e315f669ec061..97a317263a2f4 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -117,7 +117,7 @@ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); static struct i2c_driver ksz9477_i2c_driver = { .driver = { .name = "ksz9477-switch", - .of_match_table = of_match_ptr(ksz9477_dt_ids), + .of_match_table = ksz9477_dt_ids, }, .probe_new = ksz9477_i2c_probe, .remove = ksz9477_i2c_remove, -- GitLab From 0f17b42827ae63aaf08cad875d9575b1bca1e066 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:56 +0100 Subject: [PATCH 0434/2078] net: dsa: ocelot: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/dsa/ocelot/ocelot_ext.c:143:34: error: ‘ocelot_ext_switch_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Colin Foster Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/ocelot_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/ocelot_ext.c b/drivers/net/dsa/ocelot/ocelot_ext.c index 063150659816a..228737a32080b 100644 --- a/drivers/net/dsa/ocelot/ocelot_ext.c +++ b/drivers/net/dsa/ocelot/ocelot_ext.c @@ -149,7 +149,7 @@ MODULE_DEVICE_TABLE(of, ocelot_ext_switch_of_match); static struct platform_driver ocelot_ext_switch_driver = { .driver = { .name = "ocelot-ext-switch", - .of_match_table = of_match_ptr(ocelot_ext_switch_of_match), + .of_match_table = ocelot_ext_switch_of_match, }, .probe = ocelot_ext_probe, .remove = ocelot_ext_remove, -- GitLab From b0b7d1b6260b3acb9f3f3bc7f3ca88a8898e95b1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:57 +0100 Subject: [PATCH 0435/2078] net: phy: ks8995: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/phy/spi_ks8995.c:156:34: error: ‘ks8895_spi_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/net/phy/spi_ks8995.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index d4202d40d47aa..7196e927c2cd8 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -491,7 +491,7 @@ static void ks8995_remove(struct spi_device *spi) static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", - .of_match_table = of_match_ptr(ks8895_spi_of_match), + .of_match_table = ks8895_spi_of_match, }, .probe = ks8995_probe, .remove = ks8995_remove, -- GitLab From 3df09beef650af667be5ec7eee88629e6e479a51 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:58 +0100 Subject: [PATCH 0436/2078] net: ieee802154: adf7242: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/ieee802154/adf7242.c:1322:34: error: ‘adf7242_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Michael Hennerich Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 5cf218c674a5a..509acc86001cd 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1336,7 +1336,7 @@ MODULE_DEVICE_TABLE(spi, adf7242_device_id); static struct spi_driver adf7242_driver = { .id_table = adf7242_device_id, .driver = { - .of_match_table = of_match_ptr(adf7242_of_match), + .of_match_table = adf7242_of_match, .name = "adf7242", .owner = THIS_MODULE, }, -- GitLab From 3896c40b7824c8874963e257afc0f464200d2d2c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:32:59 +0100 Subject: [PATCH 0437/2078] net: ieee802154: mcr20a: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/ieee802154/mcr20a.c:1340:34: error: ‘mcr20a_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/mcr20a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index f53d185e0568d..87abe3b46316e 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1352,7 +1352,7 @@ MODULE_DEVICE_TABLE(spi, mcr20a_device_id); static struct spi_driver mcr20a_driver = { .id_table = mcr20a_device_id, .driver = { - .of_match_table = of_match_ptr(mcr20a_of_match), + .of_match_table = mcr20a_of_match, .name = "mcr20a", }, .probe = mcr20a_probe, -- GitLab From 32b7030681a488186bf1753b29a170f5d4644721 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:33:00 +0100 Subject: [PATCH 0438/2078] net: ieee802154: at86rf230: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/ieee802154/at86rf230.c:1644:34: error: ‘at86rf230_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 62b984f84d9f9..164c7f605af5e 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -1662,7 +1662,7 @@ MODULE_DEVICE_TABLE(spi, at86rf230_device_id); static struct spi_driver at86rf230_driver = { .id_table = at86rf230_device_id, .driver = { - .of_match_table = of_match_ptr(at86rf230_of_match), + .of_match_table = at86rf230_of_match, .name = "at86rf230", }, .probe = at86rf230_probe, -- GitLab From cdfe4fc4d946e2a5d589f73985043674cf298ff6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:33:01 +0100 Subject: [PATCH 0439/2078] net: ieee802154: ca8210: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/net/ieee802154/ca8210.c:3174:34: error: ‘ca8210_of_ids’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 0b0c6c0764fe9..b5def77e37d76 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3180,7 +3180,7 @@ static struct spi_driver ca8210_spi_driver = { .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, - .of_match_table = of_match_ptr(ca8210_of_ids), + .of_match_table = ca8210_of_ids, }, .probe = ca8210_probe, .remove = ca8210_remove -- GitLab From 059fa99723405b47833c7e14e062c7ba7066ca3c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:33:02 +0100 Subject: [PATCH 0440/2078] net: ieee802154: adf7242: drop owner from driver Core already sets owner in spi_driver. Signed-off-by: Krzysztof Kozlowski Acked-by: Michael Hennerich Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 509acc86001cd..f9972b8140f9f 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1338,7 +1338,6 @@ static struct spi_driver adf7242_driver = { .driver = { .of_match_table = adf7242_of_match, .name = "adf7242", - .owner = THIS_MODULE, }, .probe = adf7242_probe, .remove = adf7242_remove, -- GitLab From 613a3c44a3737c9a2dfd9f70f9b79427459bb745 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:33:03 +0100 Subject: [PATCH 0441/2078] net: ieee802154: ca8210: drop owner from driver Core already sets owner in spi_driver. Signed-off-by: Krzysztof Kozlowski Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- drivers/net/ieee802154/ca8210.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index b5def77e37d76..1eada1db8dcf7 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3179,7 +3179,6 @@ MODULE_DEVICE_TABLE(of, ca8210_of_ids); static struct spi_driver ca8210_spi_driver = { .driver = { .name = DRIVER_NAME, - .owner = THIS_MODULE, .of_match_table = ca8210_of_ids, }, .probe = ca8210_probe, -- GitLab From 45ef71d108e6f1545e2981d8af03be13f2e39411 Mon Sep 17 00:00:00 2001 From: Josef Miegl Date: Sun, 12 Mar 2023 17:45:57 +0100 Subject: [PATCH 0442/2078] net: geneve: set IFF_POINTOPOINT with IFLA_GENEVE_INNER_PROTO_INHERIT The GENEVE tunnel used with IFLA_GENEVE_INNER_PROTO_INHERIT is point-to-point, so set IFF_POINTOPOINT to reflect that. Signed-off-by: Josef Miegl Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 89ff7f8e8c7e1..5c3c7169c0a28 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1426,7 +1426,7 @@ static int geneve_configure(struct net *net, struct net_device *dev, dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; - dev->flags = IFF_NOARP; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; } err = register_netdevice(dev); -- GitLab From f947568e258038d3c2f8f38a9a7dabaca36643ec Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 13 Mar 2023 11:10:31 +0100 Subject: [PATCH 0443/2078] net/smc: Introduce explicit check for v2 support Previously, v2 support was derived from a very specific format of the SEID as part of the SMC-D codebase. Make this part of the SMC-D device API, so implementers do not need to adhere to a specific SEID format. Signed-off-by: Stefan Raspl Reviewed-and-tested-by: Jan Karcher Reviewed-by: Wenjia Zhang Signed-off-by: Wenjia Zhang Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 7 +++++++ include/net/smc.h | 1 + net/smc/smc_ism.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index eb7e134860878..1c73d32966f19 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -842,6 +842,12 @@ static int smcd_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, return ism_move(smcd->priv, dmb_tok, idx, sf, offset, data, size); } +static int smcd_supports_v2(void) +{ + return SYSTEM_EID.serial_number[0] != '0' || + SYSTEM_EID.type[0] != '0'; +} + static u64 smcd_get_local_gid(struct smcd_dev *smcd) { return ism_get_local_gid(smcd->priv); @@ -869,6 +875,7 @@ static const struct smcd_ops ism_ops = { .reset_vlan_required = smcd_reset_vlan_required, .signal_event = smcd_signal_ieq, .move_data = smcd_move, + .supports_v2 = smcd_supports_v2, .get_system_eid = ism_get_seid, .get_local_gid = smcd_get_local_gid, .get_chid = smcd_get_chid, diff --git a/include/net/smc.h b/include/net/smc.h index 597cb93811827..a002552be29c3 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -67,6 +67,7 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); + int (*supports_v2)(void); u8* (*get_system_eid)(void); u64 (*get_local_gid)(struct smcd_dev *dev); u16 (*get_chid)(struct smcd_dev *dev); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 3b0b7710c6b0f..fbee2493091f1 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -429,7 +429,7 @@ static void smcd_register_dev(struct ism_dev *ism) u8 *system_eid = NULL; system_eid = smcd->ops->get_system_eid(); - if (system_eid[24] != '0' || system_eid[28] != '0') { + if (smcd->ops->supports_v2()) { smc_ism_v2_capable = true; memcpy(smc_ism_v2_system_eid, system_eid, SMC_MAX_EID_LEN); -- GitLab From 298c91dc40e51395198533433716d0521ed10995 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 13 Mar 2023 11:10:32 +0100 Subject: [PATCH 0444/2078] net/ism: Remove extra include Signed-off-by: Stefan Raspl Signed-off-by: Wenjia Zhang Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 1c73d32966f19..05749c8779906 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- GitLab From a02d83f9947d8f71904eda4de046630c3eb6802c Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 13 Mar 2023 12:32:11 +0100 Subject: [PATCH 0445/2078] scm: fix MSG_CTRUNC setting condition for SO_PASSSEC Currently, kernel would set MSG_CTRUNC flag if msg_control buffer wasn't provided and SO_PASSCRED was set or if there was pending SCM_RIGHTS. For some reason we have no corresponding check for SO_PASSSEC. In the recvmsg(2) doc we have: MSG_CTRUNC indicates that some control data was discarded due to lack of space in the buffer for ancillary data. So, we need to set MSG_CTRUNC flag for all types of SCM. This change can break applications those don't check MSG_CTRUNC flag. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Leon Romanovsky Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Alexander Mikhalitsyn v2: - commit message was rewritten according to Eric's suggestion Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/scm.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/net/scm.h b/include/net/scm.h index 1ce365f4c2560..585adc1346bd0 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -105,16 +105,27 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc } } } + +static inline bool scm_has_secdata(struct socket *sock) +{ + return test_bit(SOCK_PASSSEC, &sock->flags); +} #else static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { } + +static inline bool scm_has_secdata(struct socket *sock) +{ + return false; +} #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!msg->msg_control) { - if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp) + if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp || + scm_has_secdata(sock)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return; -- GitLab From 3b50d9a174575473cc5ed8eba3570e02ea3d0733 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 14 Mar 2023 05:18:48 +0100 Subject: [PATCH 0446/2078] MAINTAINERS: adjust file entries after wifi driver movement Commit f79cbc77abde ("wifi: move mac80211_hwsim and virt_wifi to virtual directory") and commit 298e50ad8eb8 ("wifi: move raycs, wl3501 and rndis_wlan to legacy directory") move remaining wireless drivers into subdirectories, but does not adjust the entries in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about broken references. Repair these file references in those wireless driver sections. Signed-off-by: Lukas Bulwahn Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230314041848.5120-1-lukas.bulwahn@gmail.com --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f3053..0dab503c88ac2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12275,7 +12275,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/networking/mac80211-injection.rst F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst -F: drivers/net/wireless/mac80211_hwsim.[ch] +F: drivers/net/wireless/virtual/mac80211_hwsim.[ch] F: include/net/mac80211.h F: net/mac80211/ @@ -17542,7 +17542,7 @@ F: include/ras/ras_event.h RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER L: linux-wireless@vger.kernel.org S: Orphan -F: drivers/net/wireless/ray* +F: drivers/net/wireless/legacy/ray* RC-CORE / LIRC FRAMEWORK M: Sean Young @@ -21797,7 +21797,7 @@ USB WIRELESS RNDIS DRIVER (rndis_wlan) M: Jussi Kivilinna L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/rndis_wlan.c +F: drivers/net/wireless/legacy/rndis_wlan.c USB XHCI DRIVER M: Mathias Nyman @@ -22551,7 +22551,7 @@ F: drivers/input/misc/wistron_btns.c WL3501 WIRELESS PCMCIA CARD DRIVER L: linux-wireless@vger.kernel.org S: Odd fixes -F: drivers/net/wireless/wl3501* +F: drivers/net/wireless/legacy/wl3501* WOLFSON MICROELECTRONICS DRIVERS L: patches@opensource.cirrus.com -- GitLab From f94d7a3a51070d185e9203b53a3d669e27931e3d Mon Sep 17 00:00:00 2001 From: Bastian Germann Date: Mon, 6 Mar 2023 13:50:40 +0100 Subject: [PATCH 0447/2078] wifi: ath9k: Remove Qwest/Actiontec 802AIN ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The USB device 1668:1200 is Qwest/Actiontec 802AIN which is also correctly claimed to be supported by carl9170. Supposedly, the successor 802AIN2 has an ath9k compatible chip whose USB ID (unknown) could be inserted instead. Drop the ID from the wrong driver. I happened to find this by chance while packaging the ath9k and carl9170 firmware for Debian. Signed-off-by: Bastian Germann Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230306125041.2221-1-bage@debian.org --- drivers/net/wireless/ath/ath9k/hif_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index e0130beb304df..27ff1ca2631fc 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -42,8 +42,6 @@ static const struct usb_device_id ath9k_hif_usb_ids[] = { { USB_DEVICE(0x0cf3, 0x7015), .driver_info = AR9287_USB }, /* Atheros */ - { USB_DEVICE(0x1668, 0x1200), - .driver_info = AR9287_USB }, /* Verizon */ { USB_DEVICE(0x0cf3, 0x7010), .driver_info = AR9280_USB }, /* Atheros */ -- GitLab From f94557154d9fc77c392844523388edd4661a27a3 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sat, 11 Mar 2023 20:36:47 +0530 Subject: [PATCH 0448/2078] wifi: wcn36xx: add support for pronto-v3 Pronto v3 has a different DXE address than prior Pronto versions. This patch changes the macro to return the correct register address based on the pronto version. Signed-off-by: Vladimir Lypak Signed-off-by: Sireesh Kodali Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230311150647.22935-2-sireeshkodali1@gmail.com --- drivers/net/wireless/ath/wcn36xx/dxe.c | 23 +++++++++++----------- drivers/net/wireless/ath/wcn36xx/dxe.h | 4 ++-- drivers/net/wireless/ath/wcn36xx/main.c | 1 + drivers/net/wireless/ath/wcn36xx/wcn36xx.h | 1 + 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c index 4e9e13941c8f4..9013f056eecb4 100644 --- a/drivers/net/wireless/ath/wcn36xx/dxe.c +++ b/drivers/net/wireless/ath/wcn36xx/dxe.c @@ -112,8 +112,8 @@ int wcn36xx_dxe_alloc_ctl_blks(struct wcn36xx *wcn) wcn->dxe_rx_l_ch.desc_num = WCN36XX_DXE_CH_DESC_NUMB_RX_L; wcn->dxe_rx_h_ch.desc_num = WCN36XX_DXE_CH_DESC_NUMB_RX_H; - wcn->dxe_tx_l_ch.dxe_wq = WCN36XX_DXE_WQ_TX_L; - wcn->dxe_tx_h_ch.dxe_wq = WCN36XX_DXE_WQ_TX_H; + wcn->dxe_tx_l_ch.dxe_wq = WCN36XX_DXE_WQ_TX_L(wcn); + wcn->dxe_tx_h_ch.dxe_wq = WCN36XX_DXE_WQ_TX_H(wcn); wcn->dxe_tx_l_ch.ctrl_bd = WCN36XX_DXE_CTRL_TX_L_BD; wcn->dxe_tx_h_ch.ctrl_bd = WCN36XX_DXE_CTRL_TX_H_BD; @@ -165,8 +165,9 @@ void wcn36xx_dxe_free_ctl_blks(struct wcn36xx *wcn) wcn36xx_dxe_free_ctl_block(&wcn->dxe_rx_h_ch); } -static int wcn36xx_dxe_init_descs(struct device *dev, struct wcn36xx_dxe_ch *wcn_ch) +static int wcn36xx_dxe_init_descs(struct wcn36xx *wcn, struct wcn36xx_dxe_ch *wcn_ch) { + struct device *dev = wcn->dev; struct wcn36xx_dxe_desc *cur_dxe = NULL; struct wcn36xx_dxe_desc *prev_dxe = NULL; struct wcn36xx_dxe_ctl *cur_ctl = NULL; @@ -190,11 +191,11 @@ static int wcn36xx_dxe_init_descs(struct device *dev, struct wcn36xx_dxe_ch *wcn switch (wcn_ch->ch_type) { case WCN36XX_DXE_CH_TX_L: cur_dxe->ctrl = WCN36XX_DXE_CTRL_TX_L; - cur_dxe->dst_addr_l = WCN36XX_DXE_WQ_TX_L; + cur_dxe->dst_addr_l = WCN36XX_DXE_WQ_TX_L(wcn); break; case WCN36XX_DXE_CH_TX_H: cur_dxe->ctrl = WCN36XX_DXE_CTRL_TX_H; - cur_dxe->dst_addr_l = WCN36XX_DXE_WQ_TX_H; + cur_dxe->dst_addr_l = WCN36XX_DXE_WQ_TX_H(wcn); break; case WCN36XX_DXE_CH_RX_L: cur_dxe->ctrl = WCN36XX_DXE_CTRL_RX_L; @@ -914,7 +915,7 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) /***************************************/ /* Init descriptors for TX LOW channel */ /***************************************/ - ret = wcn36xx_dxe_init_descs(wcn->dev, &wcn->dxe_tx_l_ch); + ret = wcn36xx_dxe_init_descs(wcn, &wcn->dxe_tx_l_ch); if (ret) { dev_err(wcn->dev, "Error allocating descriptor\n"); return ret; @@ -928,14 +929,14 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) /* Program DMA destination addr for TX LOW */ wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_CH_DEST_ADDR_TX_L, - WCN36XX_DXE_WQ_TX_L); + WCN36XX_DXE_WQ_TX_L(wcn)); wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, ®_data); /***************************************/ /* Init descriptors for TX HIGH channel */ /***************************************/ - ret = wcn36xx_dxe_init_descs(wcn->dev, &wcn->dxe_tx_h_ch); + ret = wcn36xx_dxe_init_descs(wcn, &wcn->dxe_tx_h_ch); if (ret) { dev_err(wcn->dev, "Error allocating descriptor\n"); goto out_err_txh_ch; @@ -950,14 +951,14 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) /* Program DMA destination addr for TX HIGH */ wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_CH_DEST_ADDR_TX_H, - WCN36XX_DXE_WQ_TX_H); + WCN36XX_DXE_WQ_TX_H(wcn)); wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, ®_data); /***************************************/ /* Init descriptors for RX LOW channel */ /***************************************/ - ret = wcn36xx_dxe_init_descs(wcn->dev, &wcn->dxe_rx_l_ch); + ret = wcn36xx_dxe_init_descs(wcn, &wcn->dxe_rx_l_ch); if (ret) { dev_err(wcn->dev, "Error allocating descriptor\n"); goto out_err_rxl_ch; @@ -988,7 +989,7 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) /***************************************/ /* Init descriptors for RX HIGH channel */ /***************************************/ - ret = wcn36xx_dxe_init_descs(wcn->dev, &wcn->dxe_rx_h_ch); + ret = wcn36xx_dxe_init_descs(wcn, &wcn->dxe_rx_h_ch); if (ret) { dev_err(wcn->dev, "Error allocating descriptor\n"); goto out_err_rxh_ch; diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h index 26a31edf52e99..dd8c684a3ba75 100644 --- a/drivers/net/wireless/ath/wcn36xx/dxe.h +++ b/drivers/net/wireless/ath/wcn36xx/dxe.h @@ -135,8 +135,8 @@ H2H_TEST_RX_TX = DMA2 WCN36xx_DXE_CTRL_ENDIANNESS) /* TODO This must calculated properly but not hardcoded */ -#define WCN36XX_DXE_WQ_TX_L 0x17 -#define WCN36XX_DXE_WQ_TX_H 0x17 +#define WCN36XX_DXE_WQ_TX_L(wcn) ((wcn)->is_pronto_v3 ? 0x6 : 0x17) +#define WCN36XX_DXE_WQ_TX_H(wcn) ((wcn)->is_pronto_v3 ? 0x6 : 0x17) #define WCN36XX_DXE_WQ_RX_L 0xB #define WCN36XX_DXE_WQ_RX_H 0x4 diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 3b79cc1c7c5b0..8dbd115a393c7 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1508,6 +1508,7 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, } wcn->is_pronto = !!of_device_is_compatible(mmio_node, "qcom,pronto"); + wcn->is_pronto_v3 = !!of_device_is_compatible(mmio_node, "qcom,pronto-v3-pil"); /* Map the CCU memory */ index = of_property_match_string(mmio_node, "reg-names", "ccu"); diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index 9aa08b636d087..ff4a8e5d72091 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -217,6 +217,7 @@ struct wcn36xx { u8 fw_major; u32 fw_feat_caps[WCN36XX_HAL_CAPS_SIZE]; bool is_pronto; + bool is_pronto_v3; /* extra byte for the NULL termination */ u8 crm_version[WCN36XX_HAL_VERSION_LENGTH + 1]; -- GitLab From 488d9a484f96eee4f0e8e108aed42a057a1c7295 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Tue, 7 Mar 2023 02:47:06 -0800 Subject: [PATCH 0449/2078] wifi: ath12k: Add missing unwind goto in ath12k_pci_probe() Smatch Warns: drivers/net/wireless/ath/ath12k/pci.c:1198 ath12k_pci_probe() warn: missing unwind goto? Store the error value in ret and use correct label with a goto. Only Compile tested, found with Smatch. Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Reported-by: Dan Carpenter Link: https://lore.kernel.org/all/Y+426q6cfkEdb5Bv@kili/ Suggested-by: Dan Carpenter Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230307104706.240119-1-harshit.m.mogalapalli@oracle.com --- drivers/net/wireless/ath/ath12k/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index d32637b0113db..00b0080dbac38 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1223,7 +1223,8 @@ static int ath12k_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "Unknown hardware version found for QCN9274: 0x%x\n", soc_hw_version_major); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto err_pci_free_region; } break; case WCN7850_DEVICE_ID: -- GitLab From 3b1088a09ec9438523c251d8435e78988824bc0d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Mar 2023 16:22:39 -0600 Subject: [PATCH 0450/2078] wifi: ath11k: Replace fake flex-array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Address 25 of the following warnings found with GCC-13 and -fstrict-flex-arrays=3 enabled: drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c:30:51: warning: array subscript is outside array bounds of ‘const u32[0]’ {aka ‘const unsigned int[]’} [-Warray-bounds=] This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/266 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/ZAe5L5DtmsQxzqRH@work --- .../wireless/ath/ath11k/debugfs_htt_stats.h | 73 +++++++++++-------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h index 2b97cbbd28cb7..0bbd58a380de6 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h @@ -143,7 +143,8 @@ enum htt_tx_pdev_underrun_enum { /* Bytes stored in little endian order */ /* Length should be multiple of DWORD */ struct htt_stats_string_tlv { - u32 data[0]; /* Can be variable length */ + /* Can be variable length */ + DECLARE_FLEX_ARRAY(u32, data); } __packed; #define HTT_STATS_MAC_ID GENMASK(7, 0) @@ -205,27 +206,32 @@ struct htt_tx_pdev_stats_cmn_tlv { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_pdev_stats_urrn_tlv_v { - u32 urrn_stats[0]; /* HTT_TX_PDEV_MAX_URRN_STATS */ + /* HTT_TX_PDEV_MAX_URRN_STATS */ + DECLARE_FLEX_ARRAY(u32, urrn_stats); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_pdev_stats_flush_tlv_v { - u32 flush_errs[0]; /* HTT_TX_PDEV_MAX_FLUSH_REASON_STATS */ + /* HTT_TX_PDEV_MAX_FLUSH_REASON_STATS */ + DECLARE_FLEX_ARRAY(u32, flush_errs); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_pdev_stats_sifs_tlv_v { - u32 sifs_status[0]; /* HTT_TX_PDEV_MAX_SIFS_BURST_STATS */ + /* HTT_TX_PDEV_MAX_SIFS_BURST_STATS */ + DECLARE_FLEX_ARRAY(u32, sifs_status); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_pdev_stats_phy_err_tlv_v { - u32 phy_errs[0]; /* HTT_TX_PDEV_MAX_PHY_ERR_STATS */ + /* HTT_TX_PDEV_MAX_PHY_ERR_STATS */ + DECLARE_FLEX_ARRAY(u32, phy_errs); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_pdev_stats_sifs_hist_tlv_v { - u32 sifs_hist_status[0]; /* HTT_TX_PDEV_SIFS_BURST_HIST_STATS */ + /* HTT_TX_PDEV_SIFS_BURST_HIST_STATS */ + DECLARE_FLEX_ARRAY(u32, sifs_hist_status); }; struct htt_tx_pdev_stats_tx_ppdu_stats_tlv_v { @@ -590,20 +596,20 @@ struct htt_tx_hwq_difs_latency_stats_tlv_v { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_hwq_cmd_result_stats_tlv_v { - /* Histogram of sched cmd result */ - u32 cmd_result[0]; /* HTT_TX_HWQ_MAX_CMD_RESULT_STATS */ + /* Histogram of sched cmd result, HTT_TX_HWQ_MAX_CMD_RESULT_STATS */ + DECLARE_FLEX_ARRAY(u32, cmd_result); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_hwq_cmd_stall_stats_tlv_v { - /* Histogram of various pause conitions */ - u32 cmd_stall_status[0]; /* HTT_TX_HWQ_MAX_CMD_STALL_STATS */ + /* Histogram of various pause conitions, HTT_TX_HWQ_MAX_CMD_STALL_STATS */ + DECLARE_FLEX_ARRAY(u32, cmd_stall_status); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_hwq_fes_result_stats_tlv_v { - /* Histogram of number of user fes result */ - u32 fes_result[0]; /* HTT_TX_HWQ_MAX_FES_RESULT_STATS */ + /* Histogram of number of user fes result, HTT_TX_HWQ_MAX_FES_RESULT_STATS */ + DECLARE_FLEX_ARRAY(u32, fes_result); }; /* NOTE: Variable length TLV, use length spec to infer array size @@ -635,8 +641,8 @@ struct htt_tx_hwq_tried_mpdu_cnt_hist_tlv_v { * #define WAL_TXOP_USED_HISTOGRAM_INTERVAL 1000 ( 1 ms ) */ struct htt_tx_hwq_txop_used_cnt_hist_tlv_v { - /* Histogram of txop used cnt */ - u32 txop_used_cnt_hist[0]; /* HTT_TX_HWQ_TXOP_USED_CNT_HIST */ + /* Histogram of txop used cnt, HTT_TX_HWQ_TXOP_USED_CNT_HIST */ + DECLARE_FLEX_ARRAY(u32, txop_used_cnt_hist); }; /* == TX SELFGEN STATS == */ @@ -804,17 +810,20 @@ struct htt_tx_pdev_mpdu_stats_tlv { /* == TX SCHED STATS == */ /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_sched_txq_cmd_posted_tlv_v { - u32 sched_cmd_posted[0]; /* HTT_TX_PDEV_SCHED_TX_MODE_MAX */ + /* HTT_TX_PDEV_SCHED_TX_MODE_MAX */ + DECLARE_FLEX_ARRAY(u32, sched_cmd_posted); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_sched_txq_cmd_reaped_tlv_v { - u32 sched_cmd_reaped[0]; /* HTT_TX_PDEV_SCHED_TX_MODE_MAX */ + /* HTT_TX_PDEV_SCHED_TX_MODE_MAX */ + DECLARE_FLEX_ARRAY(u32, sched_cmd_reaped); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_sched_txq_sched_order_su_tlv_v { - u32 sched_order_su[0]; /* HTT_TX_PDEV_NUM_SCHED_ORDER_LOG */ + /* HTT_TX_PDEV_NUM_SCHED_ORDER_LOG */ + DECLARE_FLEX_ARRAY(u32, sched_order_su); }; enum htt_sched_txq_sched_ineligibility_tlv_enum { @@ -842,7 +851,7 @@ enum htt_sched_txq_sched_ineligibility_tlv_enum { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_sched_txq_sched_ineligibility_tlv_v { /* indexed by htt_sched_txq_sched_ineligibility_tlv_enum */ - u32 sched_ineligibility[0]; + DECLARE_FLEX_ARRAY(u32, sched_ineligibility); }; #define HTT_TX_PDEV_STATS_SCHED_PER_TXQ_MAC_ID GENMASK(7, 0) @@ -888,18 +897,20 @@ struct htt_stats_tx_sched_cmn_tlv { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_tqm_gen_mpdu_stats_tlv_v { - u32 gen_mpdu_end_reason[0]; /* HTT_TX_TQM_MAX_GEN_MPDU_END_REASON */ + /* HTT_TX_TQM_MAX_GEN_MPDU_END_REASON */ + DECLARE_FLEX_ARRAY(u32, gen_mpdu_end_reason); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_tqm_list_mpdu_stats_tlv_v { - u32 list_mpdu_end_reason[0]; /* HTT_TX_TQM_MAX_LIST_MPDU_END_REASON */ + /* HTT_TX_TQM_MAX_LIST_MPDU_END_REASON */ + DECLARE_FLEX_ARRAY(u32, list_mpdu_end_reason); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_tx_tqm_list_mpdu_cnt_tlv_v { - u32 list_mpdu_cnt_hist[0]; - /* HTT_TX_TQM_MAX_LIST_MPDU_CNT_HISTOGRAM_BINS */ + /* HTT_TX_TQM_MAX_LIST_MPDU_CNT_HISTOGRAM_BINS */ + DECLARE_FLEX_ARRAY(u32, list_mpdu_cnt_hist); }; struct htt_tx_tqm_pdev_stats_tlv_v { @@ -1098,7 +1109,7 @@ struct htt_tx_de_compl_stats_tlv { * ENTRIES_PER_BIN_COUNT) */ struct htt_tx_de_fw2wbm_ring_full_hist_tlv { - u32 fw2wbm_ring_full_hist[0]; + DECLARE_FLEX_ARRAY(u32, fw2wbm_ring_full_hist); }; struct htt_tx_de_cmn_stats_tlv { @@ -1151,7 +1162,7 @@ struct htt_ring_if_cmn_tlv { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_sfm_client_user_tlv_v { /* Number of DWORDS used per user and per client */ - u32 dwords_used_by_user_n[0]; + DECLARE_FLEX_ARRAY(u32, dwords_used_by_user_n); }; struct htt_sfm_client_tlv { @@ -1436,12 +1447,14 @@ struct htt_rx_soc_fw_stats_tlv { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_soc_fw_refill_ring_empty_tlv_v { - u32 refill_ring_empty_cnt[0]; /* HTT_RX_STATS_REFILL_MAX_RING */ + /* HTT_RX_STATS_REFILL_MAX_RING */ + DECLARE_FLEX_ARRAY(u32, refill_ring_empty_cnt); }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_soc_fw_refill_ring_num_refill_tlv_v { - u32 refill_ring_num_refill[0]; /* HTT_RX_STATS_REFILL_MAX_RING */ + /* HTT_RX_STATS_REFILL_MAX_RING */ + DECLARE_FLEX_ARRAY(u32, refill_ring_num_refill); }; /* RXDMA error code from WBM released packets */ @@ -1473,7 +1486,7 @@ enum htt_rx_rxdma_error_code_enum { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_soc_fw_refill_ring_num_rxdma_err_tlv_v { - u32 rxdma_err[0]; /* HTT_RX_RXDMA_MAX_ERR_CODE */ + DECLARE_FLEX_ARRAY(u32, rxdma_err); /* HTT_RX_RXDMA_MAX_ERR_CODE */ }; /* REO error code from WBM released packets */ @@ -1505,7 +1518,7 @@ enum htt_rx_reo_error_code_enum { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_soc_fw_refill_ring_num_reo_err_tlv_v { - u32 reo_err[0]; /* HTT_RX_REO_MAX_ERR_CODE */ + DECLARE_FLEX_ARRAY(u32, reo_err); /* HTT_RX_REO_MAX_ERR_CODE */ }; /* == RX PDEV STATS == */ @@ -1622,13 +1635,13 @@ struct htt_rx_pdev_fw_stats_phy_err_tlv { /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_pdev_fw_ring_mpdu_err_tlv_v { /* Num error MPDU for each RxDMA error type */ - u32 fw_ring_mpdu_err[0]; /* HTT_RX_STATS_RXDMA_MAX_ERR */ + DECLARE_FLEX_ARRAY(u32, fw_ring_mpdu_err); /* HTT_RX_STATS_RXDMA_MAX_ERR */ }; /* NOTE: Variable length TLV, use length spec to infer array size */ struct htt_rx_pdev_fw_mpdu_drop_tlv_v { /* Num MPDU dropped */ - u32 fw_mpdu_drop[0]; /* HTT_RX_STATS_FW_DROP_REASON_MAX */ + DECLARE_FLEX_ARRAY(u32, fw_mpdu_drop); /* HTT_RX_STATS_FW_DROP_REASON_MAX */ }; #define HTT_PDEV_CCA_STATS_TX_FRAME_INFO_PRESENT (0x1) -- GitLab From 5a78ac33e3cb8822da64dd1af196e83664b332b0 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Thu, 9 Mar 2023 15:23:08 +0530 Subject: [PATCH 0451/2078] wifi: ath11k: fix deinitialization of firmware resources Currently, in ath11k_ahb_fw_resources_init(), iommu domain mapping is done only for the chipsets having fixed firmware memory. Also, for such chipsets, mapping is done only if it does not have TrustZone support. During deinitialization, only if TrustZone support is not there, iommu is unmapped back. However, for non fixed firmware memory chipsets, TrustZone support is not there and this makes the condition check to true and it tries to unmap the memory which was not mapped during initialization. This leads to the following trace - [ 83.198790] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 [ 83.259537] Modules linked in: ath11k_ahb ath11k qmi_helpers .. snip .. [ 83.280286] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 83.287228] pc : __iommu_unmap+0x30/0x140 [ 83.293907] lr : iommu_unmap+0x5c/0xa4 [ 83.298072] sp : ffff80000b3abad0 .. snip .. [ 83.369175] Call trace: [ 83.376282] __iommu_unmap+0x30/0x140 [ 83.378541] iommu_unmap+0x5c/0xa4 [ 83.382360] ath11k_ahb_fw_resource_deinit.part.12+0x2c/0xac [ath11k_ahb] [ 83.385666] ath11k_ahb_free_resources+0x140/0x17c [ath11k_ahb] [ 83.392521] ath11k_ahb_shutdown+0x34/0x40 [ath11k_ahb] [ 83.398248] platform_shutdown+0x20/0x2c [ 83.403455] device_shutdown+0x16c/0x1c4 [ 83.407621] kernel_restart_prepare+0x34/0x3c [ 83.411529] kernel_restart+0x14/0x74 [ 83.415781] __do_sys_reboot+0x1c4/0x22c [ 83.419427] __arm64_sys_reboot+0x1c/0x24 [ 83.423420] invoke_syscall+0x44/0xfc [ 83.427326] el0_svc_common.constprop.3+0xac/0xe8 [ 83.430974] do_el0_svc+0xa0/0xa8 [ 83.435659] el0_svc+0x1c/0x44 [ 83.438957] el0t_64_sync_handler+0x60/0x144 [ 83.441910] el0t_64_sync+0x15c/0x160 [ 83.446343] Code: aa0103f4 f9400001 f90027a1 d2800001 (f94006a0) [ 83.449903] ---[ end trace 0000000000000000 ]--- This can be reproduced by probing an AHB chipset which is not having a fixed memory region. During reboot (or rmmod) trace can be seen. Fix this issue by adding a condition check on firmware fixed memory hw_param as done in the counter initialization function. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Fixes: f9eec4947add ("ath11k: Add support for targets without trustzone") Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230309095308.24937-1-quic_adisi@quicinc.com --- drivers/net/wireless/ath/ath11k/ahb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index b549576d0b513..5cbba9a8b6ba9 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1078,6 +1078,12 @@ static int ath11k_ahb_fw_resource_deinit(struct ath11k_base *ab) struct iommu_domain *iommu; size_t unmapped_size; + /* Chipsets not requiring MSA would have not initialized + * MSA resources, return success in such cases. + */ + if (!ab->hw_params.fixed_fw_mem) + return 0; + if (ab_ahb->fw.use_tz) return 0; -- GitLab From 0936998393c7dffd68ba5d73eabdabf8dddbbc41 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Mar 2023 19:49:13 +0200 Subject: [PATCH 0452/2078] wifi: iwlwifi: mvm: avoid sta lookup in queue alloc In FW restart scenarios, we allocate the queues from the iwl_mvm_realloc_queues_after_restart() function, but that is called before we insert the station ID into our map (mvm->fw_id_to_mac_id). However, in all cases where we're actually allocating a queue for a "real" (not bcast, aux, ...) station we have the sta pointer already, so just pass it along to use it instead of looking it up. This fixes an issue where after restart we only allocated a queue of size 16 (due to the ordering issue described above), and thus never got good throughput again since no aggregates could be formed on transmit. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.4d70868003e8.I3476fee5c12f5b1af2be5e2f38a9df7d66d02b62@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 69634fb82a9bf..46af2b422849f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -766,7 +766,25 @@ static int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, return -ENOSPC; } +static int iwl_mvm_get_queue_size(struct ieee80211_sta *sta) +{ + /* this queue isn't used for traffic (cab_queue) */ + if (!sta) + return IWL_MGMT_QUEUE_SIZE; + + /* support for 1k ba size */ + if (sta->deflink.eht_cap.has_eht) + return IWL_DEFAULT_QUEUE_SIZE_EHT; + + /* support for 256 ba size */ + if (sta->deflink.he_cap.has_he) + return IWL_DEFAULT_QUEUE_SIZE_HE; + + return IWL_DEFAULT_QUEUE_SIZE; +} + static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, u8 sta_id, u8 tid, unsigned int timeout) { int queue, size; @@ -776,22 +794,7 @@ static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, size = max_t(u32, IWL_MGMT_QUEUE_SIZE, mvm->trans->cfg->min_txq_size); } else { - struct ieee80211_sta *sta; - - rcu_read_lock(); - sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]); - - /* this queue isn't used for traffic (cab_queue) */ - if (IS_ERR_OR_NULL(sta)) { - size = IWL_MGMT_QUEUE_SIZE; - } else if (sta->deflink.he_cap.has_he) { - /* support for 256 ba size */ - size = IWL_DEFAULT_QUEUE_SIZE_HE; - } else { - size = IWL_DEFAULT_QUEUE_SIZE; - } - - rcu_read_unlock(); + size = iwl_mvm_get_queue_size(sta); } /* take the min with bc tbl entries allowed */ @@ -836,7 +839,8 @@ static int iwl_mvm_sta_alloc_queue_tvqm(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Allocating queue for sta %d on tid %d\n", mvmsta->sta_id, tid); - queue = iwl_mvm_tvqm_enable_txq(mvm, mvmsta->sta_id, tid, wdg_timeout); + queue = iwl_mvm_tvqm_enable_txq(mvm, sta, mvmsta->sta_id, + tid, wdg_timeout); if (queue < 0) return queue; @@ -1537,7 +1541,8 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Re-mapping sta %d tid %d\n", mvm_sta->sta_id, i); - txq_id = iwl_mvm_tvqm_enable_txq(mvm, mvm_sta->sta_id, + txq_id = iwl_mvm_tvqm_enable_txq(mvm, sta, + mvm_sta->sta_id, i, wdg); /* * on failures, just set it to IWL_MVM_INVALID_QUEUE @@ -2049,7 +2054,7 @@ static int iwl_mvm_enable_aux_snif_queue_tvqm(struct iwl_mvm *mvm, u8 sta_id) WARN_ON(!iwl_mvm_has_new_tx_api(mvm)); - return iwl_mvm_tvqm_enable_txq(mvm, sta_id, IWL_MAX_TID_COUNT, + return iwl_mvm_tvqm_enable_txq(mvm, NULL, sta_id, IWL_MAX_TID_COUNT, wdg_timeout); } @@ -2233,7 +2238,7 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * to firmware so enable queue here - after the station was added */ if (iwl_mvm_has_new_tx_api(mvm)) { - queue = iwl_mvm_tvqm_enable_txq(mvm, bsta->sta_id, + queue = iwl_mvm_tvqm_enable_txq(mvm, NULL, bsta->sta_id, IWL_MAX_TID_COUNT, wdg_timeout); if (queue < 0) { @@ -2427,9 +2432,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * tfd_queue_mask. */ if (iwl_mvm_has_new_tx_api(mvm)) { - int queue = iwl_mvm_tvqm_enable_txq(mvm, msta->sta_id, - 0, - timeout); + int queue = iwl_mvm_tvqm_enable_txq(mvm, NULL, msta->sta_id, + 0, timeout); if (queue < 0) { ret = queue; goto err; -- GitLab From 51fa8c026e072dac49ca638d66a64a66b29e916f Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Tue, 14 Mar 2023 19:49:14 +0200 Subject: [PATCH 0453/2078] wifi: iwlwifi: yoyo: Add new tlv for dump file name extension Add tlv in dump file for dump file name extension. Signed-off-by: Mukesh Sisodiya Signed-off-by: Greenman, Gregory Link: https://lore.kernel.org/r/20230314194113.ffc28212994e.Ie5f10709548497061f95c1634d942dd2facf72ec@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 32 ++++++++++++++++++- .../wireless/intel/iwlwifi/fw/error-dump.h | 17 +++++++++- .../net/wireless/intel/iwlwifi/iwl-trans.h | 4 +++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index abf49022edbe4..ca97f2fcb6932 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2320,6 +2320,34 @@ static u32 iwl_dump_ini_info(struct iwl_fw_runtime *fwrt, return entry->size; } +static u32 iwl_dump_ini_file_name_info(struct iwl_fw_runtime *fwrt, + struct list_head *list) +{ + struct iwl_fw_ini_dump_entry *entry; + struct iwl_dump_file_name_info *tlv; + u32 len = strnlen(fwrt->trans->dbg.dump_file_name_ext, + IWL_FW_INI_MAX_NAME); + + if (!fwrt->trans->dbg.dump_file_name_ext_valid) + return 0; + + entry = vzalloc(sizeof(*entry) + sizeof(*tlv) + len); + if (!entry) + return 0; + + entry->size = sizeof(*tlv) + len; + + tlv = (void *)entry->data; + tlv->type = cpu_to_le32(IWL_INI_DUMP_NAME_TYPE); + tlv->len = cpu_to_le32(len); + memcpy(tlv->data, fwrt->trans->dbg.dump_file_name_ext, len); + + /* add the dump file name extension tlv to the list */ + list_add_tail(&entry->list, list); + + return entry->size; +} + static const struct iwl_dump_ini_mem_ops iwl_dump_ini_region_ops[] = { [IWL_FW_INI_REGION_INVALID] = {}, [IWL_FW_INI_REGION_INTERNAL_BUFFER] = { @@ -2495,8 +2523,10 @@ static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt, size += iwl_dump_ini_mem(fwrt, list, ®_data, &iwl_dump_ini_region_ops[IWL_FW_INI_REGION_DRAM_IMR]); - if (size) + if (size) { + size += iwl_dump_ini_file_name_info(fwrt, list); size += iwl_dump_ini_info(fwrt, trigger, list); + } return size; } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h index c62576e442bdf..f5e08988dc7bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2014, 2018-2021 Intel Corporation + * Copyright (C) 2014, 2018-2022 Intel Corporation * Copyright (C) 2014-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -75,6 +75,18 @@ struct iwl_fw_error_dump_data { __u8 data[]; } __packed; +/** + * struct iwl_dump_file_name_info - data for dump file name addition + * @type: region type with reserved bits + * @len: the length of file name string to be added to dump file + * @data: the string need to be added to dump file + */ +struct iwl_dump_file_name_info { + __le32 type; + __le32 len; + __u8 data[]; +} __packed; + /** * struct iwl_fw_error_dump_file - the layout of the header of the file * @barker: must be %IWL_FW_ERROR_DUMP_BARKER @@ -231,6 +243,9 @@ struct iwl_fw_error_dump_mem { /* Use bit 31 as dump info type to avoid colliding with region types */ #define IWL_INI_DUMP_INFO_TYPE BIT(31) +/* Use bit 31 and bit 24 as dump name type to avoid colliding with region types */ +#define IWL_INI_DUMP_NAME_TYPE (BIT(31) | BIT(24)) + /** * struct iwl_fw_error_dump_data - data for one type * @type: &enum iwl_fw_ini_region_type diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 9aced3e44bc2d..dd277a4fa8dd3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -775,6 +775,8 @@ struct iwl_imr_data { * @periodic_trig_list: periodic triggers list * @domains_bitmap: bitmap of active domains other than &IWL_FW_INI_DOMAIN_ALWAYS_ON * @ucode_preset: preset based on ucode + * @dump_file_name_ext: dump file name extension + * @dump_file_name_ext_valid: dump file name extension if valid or not */ struct iwl_trans_debug { u8 n_dest_reg; @@ -813,6 +815,8 @@ struct iwl_trans_debug { bool restart_required; u32 last_tp_resetfw; struct iwl_imr_data imr_data; + u8 dump_file_name_ext[IWL_FW_INI_MAX_NAME]; + bool dump_file_name_ext_valid; }; struct iwl_dma_ptr { -- GitLab From 834f920ef34b57a51fa892fae6de86830316c353 Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Tue, 14 Mar 2023 19:49:15 +0200 Subject: [PATCH 0454/2078] wifi: iwlwifi: yoyo: Add driver defined dump file name Add driver defined dump file name extension for beacon loss and FW Assert case. Signed-off-by: Mukesh Sisodiya Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.2a2ee92995e9.I38fff588e32276796cd757309fc811241f827c7a@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 2 +- drivers/net/wireless/intel/iwlwifi/fw/dump.c | 58 +++++++++++++++++- .../net/wireless/intel/iwlwifi/fw/runtime.h | 4 ++ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 5 +- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 61 ++++++++++++------- 5 files changed, 101 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 05720352e49f1..43fb5cf85f05a 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -281,7 +281,7 @@ static const struct iwl_ht_params iwl_gl_a_ht_params = { .trans.gen2 = true, \ .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true, \ - .min_umac_error_event_table = 0x400000, \ + .min_umac_error_event_table = 0xD0000, \ .d3_debug_data_base_addr = 0x401000, \ .d3_debug_data_length = 60 * 1024, \ .mon_smem_regs = { \ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c index 792f7fee1840f..59ed321bcc272 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c @@ -14,6 +14,13 @@ #include "iwl-csr.h" #include "pnvm.h" +#define FW_ASSERT_LMAC_FATAL 0x70 +#define FW_ASSERT_LMAC2_FATAL 0x72 +#define FW_ASSERT_UMAC_FATAL 0x71 +#define UMAC_RT_NMI_LMAC2_FATAL 0x72 +#define RT_NMI_INTERRUPT_OTHER_LMAC_FATAL 0x73 +#define FW_ASSERT_NMI_UNKNOWN 0x84 + /* * Note: This structure is read from the device with IO accesses, * and the reading already does the endian conversion. As it is @@ -96,6 +103,17 @@ struct iwl_umac_error_event_table { #define ERROR_START_OFFSET (1 * sizeof(u32)) #define ERROR_ELEM_SIZE (7 * sizeof(u32)) +static bool iwl_fwrt_if_errorid_other_cpu(u32 err_id) +{ + err_id &= 0xFF; + + if ((err_id >= FW_ASSERT_LMAC_FATAL && + err_id <= RT_NMI_INTERRUPT_OTHER_LMAC_FATAL) || + err_id == FW_ASSERT_NMI_UNKNOWN) + return true; + return false; +} + static void iwl_fwrt_dump_umac_error_log(struct iwl_fw_runtime *fwrt) { struct iwl_trans *trans = fwrt->trans; @@ -113,6 +131,13 @@ static void iwl_fwrt_dump_umac_error_log(struct iwl_fw_runtime *fwrt) if (table.valid) fwrt->dump.umac_err_id = table.error_id; + if (!iwl_fwrt_if_errorid_other_cpu(fwrt->dump.umac_err_id) && + !fwrt->trans->dbg.dump_file_name_ext_valid) { + fwrt->trans->dbg.dump_file_name_ext_valid = true; + snprintf(fwrt->trans->dbg.dump_file_name_ext, IWL_FW_INI_MAX_NAME, + "0x%x", fwrt->dump.umac_err_id); + } + if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { IWL_ERR(trans, "Start IWL Error Log Dump:\n"); IWL_ERR(trans, "Transport status: 0x%08lX, valid: %d\n", @@ -189,6 +214,13 @@ static void iwl_fwrt_dump_lmac_error_log(struct iwl_fw_runtime *fwrt, u8 lmac_nu if (table.valid) fwrt->dump.lmac_err_id[lmac_num] = table.error_id; + if (!iwl_fwrt_if_errorid_other_cpu(fwrt->dump.lmac_err_id[lmac_num]) && + !fwrt->trans->dbg.dump_file_name_ext_valid) { + fwrt->trans->dbg.dump_file_name_ext_valid = true; + snprintf(fwrt->trans->dbg.dump_file_name_ext, IWL_FW_INI_MAX_NAME, + "0x%x", fwrt->dump.lmac_err_id[lmac_num]); + } + if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { IWL_ERR(trans, "Start IWL Error Log Dump:\n"); IWL_ERR(trans, "Transport status: 0x%08lX, valid: %d\n", @@ -274,6 +306,16 @@ static void iwl_fwrt_dump_tcm_error_log(struct iwl_fw_runtime *fwrt, int idx) iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table)); + if (table.valid) + fwrt->dump.tcm_err_id[idx] = table.error_id; + + if (!iwl_fwrt_if_errorid_other_cpu(fwrt->dump.tcm_err_id[idx]) && + !fwrt->trans->dbg.dump_file_name_ext_valid) { + fwrt->trans->dbg.dump_file_name_ext_valid = true; + snprintf(fwrt->trans->dbg.dump_file_name_ext, IWL_FW_INI_MAX_NAME, + "0x%x", fwrt->dump.tcm_err_id[idx]); + } + IWL_ERR(fwrt, "TCM%d status:\n", idx + 1); IWL_ERR(fwrt, "0x%08X | error ID\n", table.error_id); IWL_ERR(fwrt, "0x%08X | tcm branchlink2\n", table.blink2); @@ -337,6 +379,16 @@ static void iwl_fwrt_dump_rcm_error_log(struct iwl_fw_runtime *fwrt, int idx) iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table)); + if (table.valid) + fwrt->dump.rcm_err_id[idx] = table.error_id; + + if (!iwl_fwrt_if_errorid_other_cpu(fwrt->dump.rcm_err_id[idx]) && + !fwrt->trans->dbg.dump_file_name_ext_valid) { + fwrt->trans->dbg.dump_file_name_ext_valid = true; + snprintf(fwrt->trans->dbg.dump_file_name_ext, IWL_FW_INI_MAX_NAME, + "0x%x", fwrt->dump.rcm_err_id[idx]); + } + IWL_ERR(fwrt, "RCM%d status:\n", idx + 1); IWL_ERR(fwrt, "0x%08X | error ID\n", table.error_id); IWL_ERR(fwrt, "0x%08X | rcm branchlink2\n", table.blink2); @@ -444,8 +496,10 @@ void iwl_fwrt_dump_error_logs(struct iwl_fw_runtime *fwrt) iwl_fwrt_dump_umac_error_log(fwrt); iwl_fwrt_dump_tcm_error_log(fwrt, 0); iwl_fwrt_dump_rcm_error_log(fwrt, 0); - iwl_fwrt_dump_tcm_error_log(fwrt, 1); - iwl_fwrt_dump_rcm_error_log(fwrt, 1); + if (fwrt->trans->dbg.tcm_error_event_table[1]) + iwl_fwrt_dump_tcm_error_log(fwrt, 1); + if (fwrt->trans->dbg.rcm_error_event_table[1]) + iwl_fwrt_dump_rcm_error_log(fwrt, 1); iwl_fwrt_dump_iml_error_log(fwrt); iwl_fwrt_dump_fseq_regs(fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index d3cb1ae68a96c..a59cf4d9567c7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -24,6 +24,8 @@ struct iwl_fw_runtime_ops { }; #define MAX_NUM_LMAC 2 +#define MAX_NUM_TCM 2 +#define MAX_NUM_RCM 2 struct iwl_fwrt_shared_mem_cfg { int num_lmacs; int num_txfifo_entries; @@ -129,6 +131,8 @@ struct iwl_fw_runtime { unsigned long non_collect_ts_start[IWL_FW_INI_TIME_POINT_NUM]; u32 *d3_debug_data; u32 lmac_err_id[MAX_NUM_LMAC]; + u32 tcm_err_id[MAX_NUM_TCM]; + u32 rcm_err_id[MAX_NUM_RCM]; u32 umac_err_id; struct iwl_txf_iter_data txf_iter_data; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 45981e22b2dbd..3e9e9f13506b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -122,8 +122,6 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, u32 version = iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP, UCODE_ALIVE_NTFY, 0); u32 i; - struct iwl_trans *trans = mvm->trans; - enum iwl_device_family device_family = trans->trans_cfg->device_family; if (version == 6) { @@ -233,8 +231,7 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, if (umac_error_table) { if (umac_error_table >= - mvm->trans->cfg->min_umac_error_event_table || - device_family >= IWL_DEVICE_FAMILY_BZ) { + mvm->trans->cfg->min_umac_error_event_table) { iwl_fw_umac_set_alive_err_table(mvm->trans, umac_error_table); } else { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 114c96ba39eeb..422550e31bc60 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -430,47 +430,55 @@ static void iwl_mvm_mac_ctxt_set_ht_flags(struct iwl_mvm *mvm, } } -static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct iwl_mac_ctx_cmd *cmd, - const u8 *bssid_override, - u32 action) +static int iwl_mvm_get_mac_type(struct ieee80211_vif *vif) { - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct ieee80211_chanctx_conf *chanctx; - bool ht_enabled = !!(vif->bss_conf.ht_operation_mode & - IEEE80211_HT_OP_MODE_PROTECTION); - u8 cck_ack_rates, ofdm_ack_rates; - const u8 *bssid = bssid_override ?: vif->bss_conf.bssid; - int i; - - cmd->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, - mvmvif->color)); - cmd->action = cpu_to_le32(action); + u32 mac_type = FW_MAC_TYPE_BSS_STA; switch (vif->type) { case NL80211_IFTYPE_STATION: if (vif->p2p) - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_P2P_STA); + mac_type = FW_MAC_TYPE_P2P_STA; else - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_BSS_STA); + mac_type = FW_MAC_TYPE_BSS_STA; break; case NL80211_IFTYPE_AP: - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_GO); + mac_type = FW_MAC_TYPE_GO; break; case NL80211_IFTYPE_MONITOR: - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_LISTENER); + mac_type = FW_MAC_TYPE_LISTENER; break; case NL80211_IFTYPE_P2P_DEVICE: - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_P2P_DEVICE); + mac_type = FW_MAC_TYPE_P2P_DEVICE; break; case NL80211_IFTYPE_ADHOC: - cmd->mac_type = cpu_to_le32(FW_MAC_TYPE_IBSS); + mac_type = FW_MAC_TYPE_IBSS; break; default: WARN_ON_ONCE(1); } + return mac_type; +} + +static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct iwl_mac_ctx_cmd *cmd, + const u8 *bssid_override, + u32 action) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct ieee80211_chanctx_conf *chanctx; + bool ht_enabled = !!(vif->bss_conf.ht_operation_mode & + IEEE80211_HT_OP_MODE_PROTECTION); + u8 cck_ack_rates, ofdm_ack_rates; + const u8 *bssid = bssid_override ?: vif->bss_conf.bssid; + int i; + + cmd->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, + mvmvif->color)); + cmd->action = cpu_to_le32(action); + cmd->mac_type = cpu_to_le32(iwl_mvm_get_mac_type(vif)); + cmd->tsf_id = cpu_to_le32(mvmvif->tsf_id); memcpy(cmd->node_addr, vif->addr, ETH_ALEN); @@ -1428,6 +1436,7 @@ void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, struct ieee80211_vif *vif; u32 id = le32_to_cpu(mb->mac_id); union iwl_dbg_tlv_tp_data tp_data = { .fw_pkt = pkt }; + u32 mac_type; IWL_DEBUG_INFO(mvm, "missed bcn mac_id=%u, consecutive=%u (%u, %u, %u)\n", @@ -1443,6 +1452,14 @@ void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, if (!vif) goto out; + mac_type = iwl_mvm_get_mac_type(vif); + + IWL_DEBUG_INFO(mvm, "missed beacon mac_type=%u,\n", mac_type); + + mvm->trans->dbg.dump_file_name_ext_valid = true; + snprintf(mvm->trans->dbg.dump_file_name_ext, IWL_FW_INI_MAX_NAME, + "MacId_%d_MacType_%d", id, mac_type); + rx_missed_bcon = le32_to_cpu(mb->consec_missed_beacons); rx_missed_bcon_since_rx = le32_to_cpu(mb->consec_missed_beacons_since_last_rx); -- GitLab From 9c4f15cadcd74becbb7e05f7fccd27180e433b8c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:16 +0200 Subject: [PATCH 0455/2078] wifi: iwlwifi: mvm: Refactor STA_HE_CTXT_CMD sending flow Refactor STA_HE_CTXT_CMD sending flow: 1. As the new MLD API is introduced, there are some common fields in both the old and new APIs. The STA_HE_CTXT_CMD of the non-MLD API has common fields with the link and mac commands of the new MLD API. Put this common parts in functions so it can be used later by the new MLD API. 2. The HE capability which indicates whether the NIC is ack-enabled or not is the same for all bands. No need to take it from the specific band which is currently in use. Take it from the low band - this simplifies the code and doesn't require a phy_ctxt. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.7ca960596953.Ifc3e816461abbd69c6fd87752342afcedfebc293@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 94 +++++++++++-------- 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index ab02c6076276c..b1f638af4b2c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1996,6 +1996,59 @@ static void iwl_mvm_get_optimal_ppe_info(struct iwl_he_pkt_ext_v2 *pkt_ext, } } +/* + * This function sets the MU EDCA parameters ans returns whether MU EDCA + * is enabled or not + */ +static bool iwl_mvm_set_fw_mu_edca_params(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + struct iwl_he_backoff_conf + *trig_based_txf) +{ + int i; + /* Mark MU EDCA as enabled, unless none detected on some AC */ + bool mu_edca_enabled = true; + + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + struct ieee80211_he_mu_edca_param_ac_rec *mu_edca = + &mvmvif->queue_params[i].mu_edca_param_rec; + u8 ac = iwl_mvm_mac80211_ac_to_ucode_ac(i); + + if (!mvmvif->queue_params[i].mu_edca) { + mu_edca_enabled = false; + break; + } + + trig_based_txf[ac].cwmin = + cpu_to_le16(mu_edca->ecw_min_max & 0xf); + trig_based_txf[ac].cwmax = + cpu_to_le16((mu_edca->ecw_min_max & 0xf0) >> 4); + trig_based_txf[ac].aifsn = + cpu_to_le16(mu_edca->aifsn & 0xf); + trig_based_txf[ac].mu_time = + cpu_to_le16(mu_edca->mu_edca_timer); + } + + return mu_edca_enabled; +} + +static bool iwl_mvm_is_nic_ack_enabled(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) +{ + const struct ieee80211_supported_band *sband; + const struct ieee80211_sta_he_cap *own_he_cap = NULL; + + /* This capability is the same for all bands, + * so take it from one of them. + */ + sband = mvm->hw->wiphy->bands[NL80211_BAND_2GHZ]; + own_he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(vif)); + + return (own_he_cap && (own_he_cap->he_cap_elem.mac_cap_info[2] & + IEEE80211_HE_MAC_CAP2_ACK_EN)); +} + static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u8 sta_id) { @@ -2015,9 +2068,6 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, struct ieee80211_sta *sta; u32 flags; int i; - const struct ieee80211_sta_he_cap *own_he_cap = NULL; - struct ieee80211_chanctx_conf *chanctx_conf; - const struct ieee80211_supported_band *sband; void *cmd; u8 nominal_padding; @@ -2045,16 +2095,6 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - rcu_read_unlock(); - return; - } - - sband = mvm->hw->wiphy->bands[chanctx_conf->def.chan->band]; - own_he_cap = ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(vif)); - sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_ctxt_cmd.sta_id]); if (IS_ERR_OR_NULL(sta)) { rcu_read_unlock(); @@ -2211,28 +2251,9 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, rcu_read_unlock(); - /* Mark MU EDCA as enabled, unless none detected on some AC */ - flags |= STA_CTXT_HE_MU_EDCA_CW; - for (i = 0; i < IEEE80211_NUM_ACS; i++) { - struct ieee80211_he_mu_edca_param_ac_rec *mu_edca = - &mvmvif->queue_params[i].mu_edca_param_rec; - u8 ac = iwl_mvm_mac80211_ac_to_ucode_ac(i); - - if (!mvmvif->queue_params[i].mu_edca) { - flags &= ~STA_CTXT_HE_MU_EDCA_CW; - break; - } - - sta_ctxt_cmd.trig_based_txf[ac].cwmin = - cpu_to_le16(mu_edca->ecw_min_max & 0xf); - sta_ctxt_cmd.trig_based_txf[ac].cwmax = - cpu_to_le16((mu_edca->ecw_min_max & 0xf0) >> 4); - sta_ctxt_cmd.trig_based_txf[ac].aifsn = - cpu_to_le16(mu_edca->aifsn); - sta_ctxt_cmd.trig_based_txf[ac].mu_time = - cpu_to_le16(mu_edca->mu_edca_timer); - } - + if (iwl_mvm_set_fw_mu_edca_params(mvm, mvmvif, + &sta_ctxt_cmd.trig_based_txf[0])) + flags |= STA_CTXT_HE_MU_EDCA_CW; if (vif->bss_conf.uora_exists) { flags |= STA_CTXT_HE_TRIG_RND_ALLOC; @@ -2243,8 +2264,7 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, (vif->bss_conf.uora_ocw_range >> 3) & 0x7; } - if (own_he_cap && !(own_he_cap->he_cap_elem.mac_cap_info[2] & - IEEE80211_HE_MAC_CAP2_ACK_EN)) + if (!iwl_mvm_is_nic_ack_enabled(mvm, vif)) flags |= STA_CTXT_HE_NIC_NOT_ACK_ENABLED; if (vif->bss_conf.nontransmitted) { -- GitLab From af6d168f0ec613d55cdd9c4e0443108172577318 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:17 +0200 Subject: [PATCH 0456/2078] wifi: iwlwifi: mvm: Refactor MAC_CONTEXT_CMD sending flow Refactor MAC_CONTEXT_CMD sending flow: 1. As the new MLD API is introduced, there are some common fields in both the old and new APIs. The MAC_CONTEXT_CMD of the non-MLD API has common fields with the link and mac commands of the new MLD API. Put this common parts in functions so it can be used later by the new MLD API. 2. Use iwl_mvm_mac_ctxt_send_cmd when removing a mac instead of implementing the same functionality once again. 3. Change the debug print when sending the command to be more specific, so it will be easy to distinguish later if the old or new mac command was sent. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.3ab62700db4e.I2e353b308667c215aa456c160e0d90de2b9b85cc@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 320 +++++++++++------- 1 file changed, 197 insertions(+), 123 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 422550e31bc60..a30385b85a601 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -396,15 +396,45 @@ static void iwl_mvm_ack_rates(struct iwl_mvm *mvm, *ofdm_rates = ofdm; } -static void iwl_mvm_mac_ctxt_set_ht_flags(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct iwl_mac_ctx_cmd *cmd) +static void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + __le32 *cck_rates, __le32 *ofdm_rates) +{ + struct ieee80211_chanctx_conf *chanctx; + u8 cck_ack_rates, ofdm_ack_rates; + + rcu_read_lock(); + chanctx = rcu_dereference(vif->bss_conf.chanctx_conf); + iwl_mvm_ack_rates(mvm, vif, chanctx ? chanctx->def.chan->band + : NL80211_BAND_2GHZ, + &cck_ack_rates, &ofdm_ack_rates); + rcu_read_unlock(); + + *cck_rates = cpu_to_le32((u32)cck_ack_rates); + *ofdm_rates = cpu_to_le32((u32)ofdm_ack_rates); +} + +static void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + __le32 *protection_flags, + u32 ht_flag, + u32 tgg_flag) { /* for both sta and ap, ht_operation_mode hold the protection_mode */ u8 protection_mode = vif->bss_conf.ht_operation_mode & IEEE80211_HT_OP_MODE_PROTECTION; - /* The fw does not distinguish between ht and fat */ - u32 ht_flag = MAC_PROT_FLG_HT_PROT | MAC_PROT_FLG_FAT_PROT; + bool ht_enabled = !!(vif->bss_conf.ht_operation_mode & + IEEE80211_HT_OP_MODE_PROTECTION); + + if (vif->bss_conf.use_cts_prot) + *protection_flags |= cpu_to_le32(tgg_flag); + + IWL_DEBUG_RATE(mvm, "use_cts_prot %d, ht_operation_mode %d\n", + vif->bss_conf.use_cts_prot, + vif->bss_conf.ht_operation_mode); + + if (!ht_enabled) + return; IWL_DEBUG_RATE(mvm, "protection mode set to %d\n", protection_mode); /* @@ -416,12 +446,12 @@ static void iwl_mvm_mac_ctxt_set_ht_flags(struct iwl_mvm *mvm, break; case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: - cmd->protection_flags |= cpu_to_le32(ht_flag); + *protection_flags |= cpu_to_le32(ht_flag); break; case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: /* Protect when channel wider than 20MHz */ if (vif->bss_conf.chandef.width > NL80211_CHAN_WIDTH_20) - cmd->protection_flags |= cpu_to_le32(ht_flag); + *protection_flags |= cpu_to_le32(ht_flag); break; default: IWL_ERR(mvm, "Illegal protection mode %d\n", @@ -430,6 +460,34 @@ static void iwl_mvm_mac_ctxt_set_ht_flags(struct iwl_mvm *mvm, } } +static void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct iwl_ac_qos *ac, __le32 *qos_flags) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int i; + + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + u8 txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, i); + u8 ucode_ac = iwl_mvm_mac80211_ac_to_ucode_ac(i); + + ac[ucode_ac].cw_min = + cpu_to_le16(mvmvif->queue_params[i].cw_min); + ac[ucode_ac].cw_max = + cpu_to_le16(mvmvif->queue_params[i].cw_max); + ac[ucode_ac].edca_txop = + cpu_to_le16(mvmvif->queue_params[i].txop * 32); + ac[ucode_ac].aifsn = mvmvif->queue_params[i].aifs; + ac[ucode_ac].fifos_mask = BIT(txf); + } + + if (vif->bss_conf.qos) + *qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); + + if (vif->bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) + *qos_flags |= cpu_to_le32(MAC_QOS_FLG_TGN); +} + static int iwl_mvm_get_mac_type(struct ieee80211_vif *vif) { u32 mac_type = FW_MAC_TYPE_BSS_STA; @@ -456,7 +514,6 @@ static int iwl_mvm_get_mac_type(struct ieee80211_vif *vif) default: WARN_ON_ONCE(1); } - return mac_type; } @@ -467,12 +524,8 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, u32 action) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct ieee80211_chanctx_conf *chanctx; - bool ht_enabled = !!(vif->bss_conf.ht_operation_mode & - IEEE80211_HT_OP_MODE_PROTECTION); - u8 cck_ack_rates, ofdm_ack_rates; const u8 *bssid = bssid_override ?: vif->bss_conf.bssid; - int i; + u32 ht_flag; cmd->id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color)); @@ -488,15 +541,8 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, else eth_broadcast_addr(cmd->bssid_addr); - rcu_read_lock(); - chanctx = rcu_dereference(vif->bss_conf.chanctx_conf); - iwl_mvm_ack_rates(mvm, vif, chanctx ? chanctx->def.chan->band - : NL80211_BAND_2GHZ, - &cck_ack_rates, &ofdm_ack_rates); - rcu_read_unlock(); - - cmd->cck_rates = cpu_to_le32((u32)cck_ack_rates); - cmd->ofdm_rates = cpu_to_le32((u32)ofdm_ack_rates); + iwl_mvm_set_fw_basic_rates(mvm, vif, &cmd->cck_rates, + &cmd->ofdm_rates); cmd->cck_short_preamble = cpu_to_le32(vif->bss_conf.use_short_preamble ? @@ -507,33 +553,12 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cmd->filter_flags = 0; - for (i = 0; i < IEEE80211_NUM_ACS; i++) { - u8 txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, i); - u8 ucode_ac = iwl_mvm_mac80211_ac_to_ucode_ac(i); - - cmd->ac[ucode_ac].cw_min = - cpu_to_le16(mvmvif->queue_params[i].cw_min); - cmd->ac[ucode_ac].cw_max = - cpu_to_le16(mvmvif->queue_params[i].cw_max); - cmd->ac[ucode_ac].edca_txop = - cpu_to_le16(mvmvif->queue_params[i].txop * 32); - cmd->ac[ucode_ac].aifsn = mvmvif->queue_params[i].aifs; - cmd->ac[ucode_ac].fifos_mask = BIT(txf); - } - - if (vif->bss_conf.qos) - cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); + iwl_mvm_set_fw_qos_params(mvm, vif, &cmd->ac[0], &cmd->qos_flags); - if (vif->bss_conf.use_cts_prot) - cmd->protection_flags |= cpu_to_le32(MAC_PROT_FLG_TGG_PROTECT); - - IWL_DEBUG_RATE(mvm, "use_cts_prot %d, ht_operation_mode %d\n", - vif->bss_conf.use_cts_prot, - vif->bss_conf.ht_operation_mode); - if (vif->bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) - cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_TGN); - if (ht_enabled) - iwl_mvm_mac_ctxt_set_ht_flags(mvm, vif, cmd); + /* The fw does not distinguish between ht and fat */ + ht_flag = MAC_PROT_FLG_HT_PROT | MAC_PROT_FLG_FAT_PROT; + iwl_mvm_set_fw_protection_flags(mvm, vif, &cmd->protection_flags, + ht_flag, MAC_PROT_FLG_TGG_PROTECT); } static int iwl_mvm_mac_ctxt_send_cmd(struct iwl_mvm *mvm, @@ -542,11 +567,76 @@ static int iwl_mvm_mac_ctxt_send_cmd(struct iwl_mvm *mvm, int ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, 0, sizeof(*cmd), cmd); if (ret) - IWL_ERR(mvm, "Failed to send MAC context (action:%d): %d\n", + IWL_ERR(mvm, "Failed to send MAC_CONTEXT_CMD (action:%d): %d\n", le32_to_cpu(cmd->action), ret); return ret; } +static void iwl_mvm_set_fw_dtim_tbtt(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + __le64 *dtim_tsf, __le32 *dtim_time, + __le32 *assoc_beacon_arrive_time) +{ + u32 dtim_offs; + + /* + * The DTIM count counts down, so when it is N that means N + * more beacon intervals happen until the DTIM TBTT. Therefore + * add this to the current time. If that ends up being in the + * future, the firmware will handle it. + * + * Also note that the system_timestamp (which we get here as + * "sync_device_ts") and TSF timestamp aren't at exactly the + * same offset in the frame -- the TSF is at the first symbol + * of the TSF, the system timestamp is at signal acquisition + * time. This means there's an offset between them of at most + * a few hundred microseconds (24 * 8 bits + PLCP time gives + * 384us in the longest case), this is currently not relevant + * as the firmware wakes up around 2ms before the TBTT. + */ + dtim_offs = vif->bss_conf.sync_dtim_count * + vif->bss_conf.beacon_int; + /* convert TU to usecs */ + dtim_offs *= 1024; + + *dtim_tsf = + cpu_to_le64(vif->bss_conf.sync_tsf + dtim_offs); + *dtim_time = + cpu_to_le32(vif->bss_conf.sync_device_ts + dtim_offs); + *assoc_beacon_arrive_time = + cpu_to_le32(vif->bss_conf.sync_device_ts); + + IWL_DEBUG_INFO(mvm, "DTIM TBTT is 0x%llx/0x%x, offset %d\n", + le64_to_cpu(*dtim_tsf), + le32_to_cpu(*dtim_time), + dtim_offs); +} + +static __le32 iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) +{ + struct ieee80211_p2p_noa_attr *noa = + &vif->bss_conf.p2p_noa_attr; + + return cpu_to_le32(noa->oppps_ctwindow & + IEEE80211_P2P_OPPPS_CTWINDOW_MASK); +} + +static __le32 iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) +{ + __le32 twt_policy = cpu_to_le32(0); + + if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT) + twt_policy |= cpu_to_le32(TWT_SUPPORTED); + if (vif->bss_conf.twt_protected) + twt_policy |= cpu_to_le32(PROTECTED_TWT_SUPPORTED); + if (vif->bss_conf.twt_broadcast) + twt_policy |= cpu_to_le32(BROADCAST_TWT_SUPPORTED); + + return twt_policy; +} + static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 action, bool force_assoc_off, @@ -567,11 +657,9 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, cmd.filter_flags |= cpu_to_le32(MAC_FILTER_ACCEPT_GRP); if (vif->p2p) { - struct ieee80211_p2p_noa_attr *noa = - &vif->bss_conf.p2p_noa_attr; + cmd.p2p_sta.ctwin = + iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(mvm, vif); - cmd.p2p_sta.ctwin = cpu_to_le32(noa->oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK); ctxt_sta = &cmd.p2p_sta.sta; } else { ctxt_sta = &cmd.sta; @@ -581,39 +669,10 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, if (vif->cfg.assoc && vif->bss_conf.dtim_period && !force_assoc_off) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - u32 dtim_offs; - /* - * The DTIM count counts down, so when it is N that means N - * more beacon intervals happen until the DTIM TBTT. Therefore - * add this to the current time. If that ends up being in the - * future, the firmware will handle it. - * - * Also note that the system_timestamp (which we get here as - * "sync_device_ts") and TSF timestamp aren't at exactly the - * same offset in the frame -- the TSF is at the first symbol - * of the TSF, the system timestamp is at signal acquisition - * time. This means there's an offset between them of at most - * a few hundred microseconds (24 * 8 bits + PLCP time gives - * 384us in the longest case), this is currently not relevant - * as the firmware wakes up around 2ms before the TBTT. - */ - dtim_offs = vif->bss_conf.sync_dtim_count * - vif->bss_conf.beacon_int; - /* convert TU to usecs */ - dtim_offs *= 1024; - - ctxt_sta->dtim_tsf = - cpu_to_le64(vif->bss_conf.sync_tsf + dtim_offs); - ctxt_sta->dtim_time = - cpu_to_le32(vif->bss_conf.sync_device_ts + dtim_offs); - ctxt_sta->assoc_beacon_arrive_time = - cpu_to_le32(vif->bss_conf.sync_device_ts); - - IWL_DEBUG_INFO(mvm, "DTIM TBTT is 0x%llx/0x%x, offset %d\n", - le64_to_cpu(ctxt_sta->dtim_tsf), - le32_to_cpu(ctxt_sta->dtim_time), - dtim_offs); + iwl_mvm_set_fw_dtim_tbtt(mvm, vif, &ctxt_sta->dtim_tsf, + &ctxt_sta->dtim_time, + &ctxt_sta->assoc_beacon_arrive_time); ctxt_sta->is_assoc = cpu_to_le32(1); @@ -643,14 +702,8 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, if (vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) { cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_11AX); - if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT) - ctxt_sta->data_policy |= cpu_to_le32(TWT_SUPPORTED); - if (vif->bss_conf.twt_protected) - ctxt_sta->data_policy |= - cpu_to_le32(PROTECTED_TWT_SUPPORTED); - if (vif->bss_conf.twt_broadcast) - ctxt_sta->data_policy |= - cpu_to_le32(BROADCAST_TWT_SUPPORTED); + ctxt_sta->data_policy |= + iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(mvm, vif); } @@ -732,20 +785,11 @@ static void iwl_mvm_go_iterator(void *_data, u8 *mac, struct ieee80211_vif *vif) data->go_active = true; } -static int iwl_mvm_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - u32 action) +static __le32 iwl_mac_ctxt_p2p_dev_has_extended_disc(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { - struct iwl_mac_ctx_cmd cmd = {}; struct iwl_mvm_go_iterator_data data = {}; - WARN_ON(vif->type != NL80211_IFTYPE_P2P_DEVICE); - - iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); - - /* Override the filter flags to accept only probe requests */ - cmd.filter_flags = cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - /* * This flag should be set to true when the P2P Device is * discoverable and there is at least another active P2P GO. Settings @@ -758,7 +802,25 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, mvm->hw, IEEE80211_IFACE_ITER_RESUME_ALL, iwl_mvm_go_iterator, &data); - cmd.p2p_dev.is_disc_extended = cpu_to_le32(data.go_active ? 1 : 0); + return cpu_to_le32(data.go_active ? 1 : 0); +} + +static int iwl_mvm_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action) +{ + struct iwl_mac_ctx_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_P2P_DEVICE); + + iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); + + cmd.p2p_dev.is_disc_extended = + iwl_mac_ctxt_p2p_dev_has_extended_disc(mvm, vif); + + /* Override the filter flags to accept only probe requests */ + cmd.filter_flags = cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); + return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); } @@ -1102,6 +1164,30 @@ static void iwl_mvm_mac_ap_iterator(void *_data, u8 *mac, data->beacon_int = vif->bss_conf.beacon_int; } +/* + * Fill the filter flags for mac context of type AP or P2P GO. + */ +static void iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + __le32 *filter_flags, + int accept_probe_req_flag, + int accept_beacon_flag) +{ + /* + * in AP mode, pass probe requests and beacons from other APs + * (needed for ht protection); when there're no any associated + * station don't ask FW to pass beacons to prevent unnecessary + * wake-ups. + */ + *filter_flags |= cpu_to_le32(accept_probe_req_flag); + if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { + *filter_flags |= cpu_to_le32(accept_beacon_flag); + IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); + } else { + IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); + } +} + /* * Fill the specific data for mac context of type AP of P2P GO */ @@ -1121,19 +1207,10 @@ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST); - /* - * in AP mode, pass probe requests and beacons from other APs - * (needed for ht protection); when there're no any associated - * station don't ask FW to pass beacons to prevent unnecessary - * wake-ups. - */ - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); - IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); - } else { - IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); - } + iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(mvm, mvmvif, + &cmd->filter_flags, + MAC_FILTER_IN_PROBE_REQUEST, + MAC_FILTER_IN_BEACON); ctxt_ap->bi = cpu_to_le32(vif->bss_conf.beacon_int); ctxt_ap->dtim_interval = cpu_to_le32(vif->bss_conf.beacon_int * @@ -1295,12 +1372,9 @@ int iwl_mvm_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif) mvmvif->color)); cmd.action = cpu_to_le32(FW_CTXT_ACTION_REMOVE); - ret = iwl_mvm_send_cmd_pdu(mvm, MAC_CONTEXT_CMD, 0, - sizeof(cmd), &cmd); - if (ret) { - IWL_ERR(mvm, "Failed to remove MAC context: %d\n", ret); + ret = iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); + if (ret) return ret; - } mvmvif->uploaded = false; -- GitLab From 9be162a7b670b7e8dbada3c139d77961f457a34d Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:18 +0200 Subject: [PATCH 0457/2078] wifi: iwlwifi: mvm: add support for the new MAC CTXT command As a part of the new MLD FW API changes, we have a new MAC CTXT command. Add structures and enum definitions, along with the functions that sends this command (i.e. add, remove and change mac ctxt). This functions will be in used in the next patches. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.99a41a5bc55f.I310312c829f5f2f69b64fcddce487b1eab80165b@changeid Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 109 ++++++- .../net/wireless/intel/iwlwifi/mvm/Makefile | 2 +- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 24 +- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +- .../net/wireless/intel/iwlwifi/mvm/mld-mac.c | 271 ++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 18 ++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + 7 files changed, 411 insertions(+), 17 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 712532f176308..f12b2a25cd3d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2019, 2021 Intel Corporation + * Copyright (C) 2012-2014, 2018-2019, 2021-2022 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ #ifndef __iwl_fw_api_mac_cfg_h__ #define __iwl_fw_api_mac_cfg_h__ +#include "mac.h" + /** * enum iwl_mac_conf_subcmd_ids - mac configuration command IDs */ @@ -31,7 +33,10 @@ enum iwl_mac_conf_subcmd_ids { * @CANCEL_CHANNEL_SWITCH_CMD: &struct iwl_cancel_channel_switch_cmd */ CANCEL_CHANNEL_SWITCH_CMD = 0x6, - + /** + * @MAC_CONFIG_CMD: &struct iwl_mac_config_cmd + */ + MAC_CONFIG_CMD = 0x8, /** * @SESSION_PROTECTION_NOTIF: &struct iwl_mvm_session_prot_notif */ @@ -182,4 +187,104 @@ struct iwl_mac_low_latency_cmd { __le16 reserved; } __packed; /* MAC_LOW_LATENCY_API_S_VER_1 */ +/** + * struct iwl_mac_client_data - configuration data for client MAC context + * + * @is_assoc: 1 for associated state, 0 otherwise + * @assoc_id: unique ID assigned by the AP during association + * @data_policy: see &enum iwl_mac_data_policy + * @ctwin: client traffic window in TU (period after TBTT when GO is present). + * 0 indicates that there is no CT window. + */ +struct iwl_mac_client_data { + __le32 is_assoc; + __le32 assoc_id; + __le32 data_policy; + __le32 ctwin; +} __packed; /* MAC_CONTEXT_CONFIG_CLIENT_DATA_API_S_VER_1 */ + +/** + * struct iwl_mac_go_ibss_data - configuration data for GO and IBSS MAC context + * + * @beacon_template: beacon template ID + */ +struct iwl_mac_go_ibss_data { + __le32 beacon_template; +} __packed; /* MAC_CONTEXT_CONFIG_GO_IBSS_DATA_API_S_VER_1 */ + +/** + * struct iwl_mac_p2p_dev_data - configuration data for P2P device MAC context + * + * @is_disc_extended: if set to true, P2P Device discoverability is enabled on + * other channels as well. This should be to true only in case that the + * device is discoverable and there is an active GO. Note that setting this + * field when not needed, will increase the number of interrupts and have + * effect on the platform power, as this setting opens the Rx filters on + * all macs. + */ +struct iwl_mac_p2p_dev_data { + __le32 is_disc_extended; +} __packed; /* MAC_CONTEXT_CONFIG_P2P_DEV_DATA_API_S_VER_1 */ + +/** + * enum iwl_mac_config_filter_flags - MAC context configuration filter flags + * + * @MAC_CFG_FILTER_PROMISC: accept all data frames + * @MAC_CFG_FILTER_ACCEPT_CONTROL_AND_MGMT: pass all management and + * control frames to the host + * @MAC_CFG_FILTER_ACCEPT_GRP: accept multicast frames + * @MAC_CFG_FILTER_ACCEPT_BEACON: accept beacon frames + * @MAC_CFG_FILTER_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe response + * @MAC_CFG_FILTER_ACCEPT_PROBE_REQ: accept probe requests + */ +enum iwl_mac_config_filter_flags { + MAC_CFG_FILTER_PROMISC = BIT(0), + MAC_CFG_FILTER_ACCEPT_CONTROL_AND_MGMT = BIT(1), + MAC_CFG_FILTER_ACCEPT_GRP = BIT(2), + MAC_CFG_FILTER_ACCEPT_BEACON = BIT(3), + MAC_CFG_FILTER_ACCEPT_BCAST_PROBE_RESP = BIT(4), + MAC_CFG_FILTER_ACCEPT_PROBE_REQ = BIT(5), +}; /* MAC_FILTER_FLAGS_MASK_E_VER_1 */ + +/** + * struct iwl_mac_config_cmd - command structure to configure MAC contexts in + * MLD API + * ( MAC_CONTEXT_CONFIG_CMD = 0x8 ) + * + * @id_and_color: ID and color of the MAC + * @action: action to perform, one of FW_CTXT_ACTION_* + * @mac_type: one of &enum iwl_mac_types + * @local_mld_addr: mld address + * @reserved_for_local_mld_addr: reserved + * @filter_flags: combination of &enum iwl_mac_config_filter_flags + * @he_support: does this MAC support HE + * @eht_support: does this MAC support EHT. Requires he_support + * @nic_not_ack_enabled: mark that the NIC doesn't support receiving + * ACK-enabled AGG, (i.e. both BACK and non-BACK frames in single AGG). + * If the NIC is not ACK_ENABLED it may use the EOF-bit in first non-0 + * len delim to determine if AGG or single. + * @client: client mac data + * @go_ibss: mac data for go or ibss + * @p2p_dev: mac data for p2p device + */ +struct iwl_mac_config_cmd { + /* COMMON_INDEX_HDR_API_S_VER_1 */ + __le32 id_and_color; + __le32 action; + /* MAC_CONTEXT_TYPE_API_E */ + __le32 mac_type; + u8 local_mld_addr[6]; + __le16 reserved_for_local_mld_addr; + __le32 filter_flags; + __le32 he_support; + __le32 eht_support; + __le32 nic_not_ack_enabled; + /* MAC_CONTEXT_CONFIG_SPECIFIC_DATA_API_U_VER_1 */ + union { + struct iwl_mac_client_data client; + struct iwl_mac_go_ibss_data go_ibss; + struct iwl_mac_p2p_dev_data p2p_dev; + }; +} __packed; /* MAC_CONTEXT_CONFIG_CMD_API_S_VER_1 */ + #endif /* __iwl_fw_api_mac_cfg_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index b28fcf0cf9cf4..d71f0a99b7c45 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -7,7 +7,7 @@ iwlmvm-y += power.o coex.o iwlmvm-y += tt.o offloading.o tdls.o iwlmvm-y += ftm-responder.o ftm-initiator.o iwlmvm-y += rfi.o -iwlmvm-y += mld-key.o +iwlmvm-y += mld-key.o mld-mac.o iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwlmvm-$(CONFIG_PM) += d3.o diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index a30385b85a601..b21327ec44016 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -488,7 +488,7 @@ static void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, *qos_flags |= cpu_to_le32(MAC_QOS_FLG_TGN); } -static int iwl_mvm_get_mac_type(struct ieee80211_vif *vif) +int iwl_mvm_get_mac_type(struct ieee80211_vif *vif) { u32 mac_type = FW_MAC_TYPE_BSS_STA; @@ -612,8 +612,8 @@ static void iwl_mvm_set_fw_dtim_tbtt(struct iwl_mvm *mvm, dtim_offs); } -static __le32 iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +__le32 iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { struct ieee80211_p2p_noa_attr *noa = &vif->bss_conf.p2p_noa_attr; @@ -622,8 +622,8 @@ static __le32 iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(struct iwl_mvm *mvm, IEEE80211_P2P_OPPPS_CTWINDOW_MASK); } -static __le32 iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +__le32 iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { __le32 twt_policy = cpu_to_le32(0); @@ -785,8 +785,8 @@ static void iwl_mvm_go_iterator(void *_data, u8 *mac, struct ieee80211_vif *vif) data->go_active = true; } -static __le32 iwl_mac_ctxt_p2p_dev_has_extended_disc(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +__le32 iwl_mac_ctxt_p2p_dev_has_extended_disc(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { struct iwl_mvm_go_iterator_data data = {}; @@ -1167,11 +1167,11 @@ static void iwl_mvm_mac_ap_iterator(void *_data, u8 *mac, /* * Fill the filter flags for mac context of type AP or P2P GO. */ -static void iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(struct iwl_mvm *mvm, - struct iwl_mvm_vif *mvmvif, - __le32 *filter_flags, - int accept_probe_req_flag, - int accept_beacon_flag) +void iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + __le32 *filter_flags, + int accept_probe_req_flag, + int accept_beacon_flag) { /* * in AP mode, pass probe requests and beacons from other APs diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index b1f638af4b2c8..210b134c735fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2032,8 +2032,7 @@ static bool iwl_mvm_set_fw_mu_edca_params(struct iwl_mvm *mvm, return mu_edca_enabled; } -static bool iwl_mvm_is_nic_ack_enabled(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +bool iwl_mvm_is_nic_ack_enabled(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { const struct ieee80211_supported_band *sband; const struct ieee80211_sta_he_cap *own_he_cap = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c new file mode 100644 index 0000000000000..240f51d2fd536 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (C) 2022 Intel Corporation + */ +#include "mvm.h" + +static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct iwl_mac_config_cmd *cmd, + u32 action) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + cmd->id_and_color = cpu_to_le32(mvmvif->id); + cmd->action = cpu_to_le32(action); + + cmd->mac_type = cpu_to_le32(iwl_mvm_get_mac_type(vif)); + + memcpy(cmd->local_mld_addr, vif->addr, ETH_ALEN); + + cmd->filter_flags = cpu_to_le32(0); + cmd->he_support = cpu_to_le32(0); + cmd->eht_support = cpu_to_le32(0); + + cmd->nic_not_ack_enabled = + cpu_to_le32(!iwl_mvm_is_nic_ack_enabled(mvm, vif)); + + if (iwlwifi_mod_params.disable_11ax) + return; + cmd->he_support = cpu_to_le32(vif->bss_conf.he_support); + + if (!iwlwifi_mod_params.disable_11be && cmd->he_support) + cmd->eht_support = cpu_to_le32(vif->bss_conf.eht_support); +} + +static int iwl_mvm_mld_mac_ctxt_send_cmd(struct iwl_mvm *mvm, + struct iwl_mac_config_cmd *cmd) +{ + int ret = iwl_mvm_send_cmd_pdu(mvm, + WIDE_ID(MAC_CONF_GROUP, MAC_CONFIG_CMD), + 0, sizeof(*cmd), cmd); + if (ret) + IWL_ERR(mvm, "Failed to send MAC_CONFIG_CMD (action:%d): %d\n", + le32_to_cpu(cmd->action), ret); + return ret; +} + +static int iwl_mvm_mld_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action, bool force_assoc_off) +{ + struct iwl_mac_config_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_STATION); + + /* Fill the common data for all mac context types */ + iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action); + + /* + * We always want to hear MCAST frames, if we're not authorized yet, + * we'll drop them. + */ + cmd.filter_flags |= cpu_to_le32(MAC_CFG_FILTER_ACCEPT_GRP); + + if (vif->p2p) + cmd.client.ctwin = + iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(mvm, vif); + + if (vif->cfg.assoc && vif->bss_conf.dtim_period && + !force_assoc_off) { + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + cmd.client.is_assoc = cpu_to_le32(1); + + if (!mvmvif->authorized && + fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_COEX_HIGH_PRIO)) + cmd.client.data_policy |= + cpu_to_le32(COEX_HIGH_PRIORITY_ENABLE); + + } else { + cmd.client.is_assoc = cpu_to_le32(0); + + /* Allow beacons to pass through as long as we are not + * associated, or we do not have dtim period information. + */ + cmd.filter_flags |= cpu_to_le32(MAC_CFG_FILTER_ACCEPT_BEACON); + } + + cmd.client.assoc_id = cpu_to_le32(vif->cfg.aid); + + if (vif->probe_req_reg && vif->cfg.assoc && vif->p2p) + cmd.filter_flags |= cpu_to_le32(MAC_CFG_FILTER_ACCEPT_PROBE_REQ); + + if (vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) + cmd.client.data_policy |= + iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(mvm, vif); + + return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); +} + +static int iwl_mvm_mld_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action) +{ + struct iwl_mac_config_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_MONITOR); + + iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action); + + cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_PROMISC | + MAC_FILTER_IN_CONTROL_AND_MGMT | + MAC_CFG_FILTER_ACCEPT_BEACON | + MAC_CFG_FILTER_ACCEPT_PROBE_REQ | + MAC_CFG_FILTER_ACCEPT_GRP); + + return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); +} + +static int iwl_mvm_mld_mac_ctxt_cmd_ibss(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mac_config_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_ADHOC); + + iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action); + + cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_ACCEPT_BEACON | + MAC_CFG_FILTER_ACCEPT_PROBE_REQ | + MAC_CFG_FILTER_ACCEPT_GRP); + + /* TODO: Assumes that the beacon id == mac context id */ + cmd.go_ibss.beacon_template = cpu_to_le32(mvmvif->id); + + return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); +} + +static int iwl_mvm_mld_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action) +{ + struct iwl_mac_config_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_P2P_DEVICE); + + iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action); + + cmd.p2p_dev.is_disc_extended = + iwl_mac_ctxt_p2p_dev_has_extended_disc(mvm, vif); + + /* Override the filter flags to accept only probe requests */ + cmd.filter_flags = cpu_to_le32(MAC_CFG_FILTER_ACCEPT_PROBE_REQ); + + return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); +} + +static int iwl_mvm_mld_mac_ctxt_cmd_ap_go(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mac_config_cmd cmd = {}; + + WARN_ON(vif->type != NL80211_IFTYPE_AP); + + /* Fill the common data for all mac context types */ + iwl_mvm_mld_mac_ctxt_cmd_common(mvm, vif, &cmd, action); + + iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(mvm, mvmvif, + &cmd.filter_flags, + MAC_CFG_FILTER_ACCEPT_PROBE_REQ, + MAC_CFG_FILTER_ACCEPT_BEACON); + + /* TODO: Assume that the beacon id == mac context id */ + cmd.go_ibss.beacon_template = cpu_to_le32(mvmvif->id); + + return iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); +} + +static int iwl_mvm_mld_mac_ctx_send(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + u32 action, bool force_assoc_off) +{ + switch (vif->type) { + case NL80211_IFTYPE_STATION: + return iwl_mvm_mld_mac_ctxt_cmd_sta(mvm, vif, action, + force_assoc_off); + case NL80211_IFTYPE_AP: + return iwl_mvm_mld_mac_ctxt_cmd_ap_go(mvm, vif, action); + case NL80211_IFTYPE_MONITOR: + return iwl_mvm_mld_mac_ctxt_cmd_listener(mvm, vif, action); + case NL80211_IFTYPE_P2P_DEVICE: + return iwl_mvm_mld_mac_ctxt_cmd_p2p_device(mvm, vif, action); + case NL80211_IFTYPE_ADHOC: + return iwl_mvm_mld_mac_ctxt_cmd_ibss(mvm, vif, action); + default: + break; + } + + return -EOPNOTSUPP; +} + +int iwl_mvm_mld_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + if (WARN_ON_ONCE(vif->type == NL80211_IFTYPE_NAN)) + return -EOPNOTSUPP; + + if (WARN_ONCE(mvmvif->uploaded, "Adding active MAC %pM/%d\n", + vif->addr, ieee80211_vif_type_p2p(vif))) + return -EIO; + + ret = iwl_mvm_mld_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_ADD, + true); + if (ret) + return ret; + + /* will only do anything at resume from D3 time */ + iwl_mvm_set_last_nonqos_seq(mvm, vif); + + mvmvif->uploaded = true; + return 0; +} + +int iwl_mvm_mld_mac_ctxt_changed(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + bool force_assoc_off) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (WARN_ON_ONCE(vif->type == NL80211_IFTYPE_NAN)) + return -EOPNOTSUPP; + + if (WARN_ONCE(!mvmvif->uploaded, "Changing inactive MAC %pM/%d\n", + vif->addr, ieee80211_vif_type_p2p(vif))) + return -EIO; + + return iwl_mvm_mld_mac_ctx_send(mvm, vif, FW_CTXT_ACTION_MODIFY, + force_assoc_off); +} + +int iwl_mvm_mld_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mac_config_cmd cmd = { + .action = cpu_to_le32(FW_CTXT_ACTION_REMOVE), + .id_and_color = cpu_to_le32(mvmvif->id), + }; + int ret; + + if (WARN_ON_ONCE(vif->type == NL80211_IFTYPE_NAN)) + return -EOPNOTSUPP; + + if (WARN_ONCE(!mvmvif->uploaded, "Removing inactive MAC %pM/%d\n", + vif->addr, ieee80211_vif_type_p2p(vif))) + return -EIO; + + ret = iwl_mvm_mld_mac_ctxt_send_cmd(mvm, &cmd); + if (ret) + return ret; + + mvmvif->uploaded = false; + + return 0; +} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 6bd1a4c72a129..1287207a34b3b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1477,6 +1477,7 @@ void iwl_mvm_hwrate_to_tx_rate_v1(u32 rate_n_flags, struct ieee80211_tx_rate *r); u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx); u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac); +bool iwl_mvm_is_nic_ack_enabled(struct iwl_mvm *mvm, struct ieee80211_vif *vif); static inline void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) { @@ -1638,6 +1639,23 @@ u8 iwl_mvm_get_channel_width(struct cfg80211_chan_def *chandef); u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef); /* MAC (virtual interface) programming */ + +__le32 iwl_mac_ctxt_p2p_dev_has_extended_disc(struct iwl_mvm *mvm, + struct ieee80211_vif *vif); +void iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + __le32 *filter_flags, + int accept_probe_req_flag, + int accept_beacon_flag); +int iwl_mvm_get_mac_type(struct ieee80211_vif *vif); +__le32 iwl_mvm_mac_ctxt_cmd_p2p_sta_get_oppps_ctwin(struct iwl_mvm *mvm, + struct ieee80211_vif *vif); +__le32 iwl_mvm_mac_ctxt_cmd_sta_get_twt_policy(struct iwl_mvm *mvm, + struct ieee80211_vif *vif); +int iwl_mvm_mld_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_mld_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + bool force_assoc_off); +int iwl_mvm_mld_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index f4e9446d9dc2d..69c390ff27453 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -521,6 +521,7 @@ static const struct iwl_hcmd_names iwl_mvm_system_names[] = { static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = { HCMD_NAME(CHANNEL_SWITCH_TIME_EVENT_CMD), HCMD_NAME(SESSION_PROTECTION_CMD), + HCMD_NAME(MAC_CONFIG_CMD), HCMD_NAME(SESSION_PROTECTION_NOTIF), HCMD_NAME(CHANNEL_SWITCH_START_NOTIF), }; -- GitLab From 55eb1c5fa4b260491d8be3299d4546d0b34465f4 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:19 +0200 Subject: [PATCH 0458/2078] wifi: iwlwifi: mvm: add support for the new LINK command As a part of the new MLD FW API changes, we have a new LINK command to add/remove/configure a link. Add structures and enum definitions, along with the functions that sends this command (i.e. add, remove and change mac ctxt). These functions will be in used in the next patches. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.d7808329effb.I13bea2db206b78540bc866bc3ab755ad5be78c53@changeid Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 179 ++++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/Makefile | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/link.c | 175 +++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 26 ++- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 20 ++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + 7 files changed, 390 insertions(+), 20 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/link.c diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index f12b2a25cd3d2..301f34f135238 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -37,6 +37,10 @@ enum iwl_mac_conf_subcmd_ids { * @MAC_CONFIG_CMD: &struct iwl_mac_config_cmd */ MAC_CONFIG_CMD = 0x8, + /** + * @LINK_CONFIG_CMD: &struct iwl_link_config_cmd + */ + LINK_CONFIG_CMD = 0x9, /** * @SESSION_PROTECTION_NOTIF: &struct iwl_mvm_session_prot_notif */ @@ -287,4 +291,179 @@ struct iwl_mac_config_cmd { }; } __packed; /* MAC_CONTEXT_CONFIG_CMD_API_S_VER_1 */ +/** + * enum iwl_link_ctx_modify_flags - indicate to the fw what fields are being + * modified in &iwl_link_ctx_cfg_cmd + * + * @LINK_CONTEXT_MODIFY_ACTIVE: covers iwl_link_ctx_cfg_cmd::active + * @LINK_CONTEXT_MODIFY_RATES_INFO: covers iwl_link_ctx_cfg_cmd::cck_rates, + * iwl_link_ctx_cfg_cmd::ofdm_rates, + * iwl_link_ctx_cfg_cmd::cck_short_preamble, + * iwl_link_ctx_cfg_cmd::short_slot + * @LINK_CONTEXT_MODIFY_PROTECT_FLAGS: covers + * iwl_link_ctx_cfg_cmd::protection_flags + * @LINK_CONTEXT_MODIFY_QOS_PARAMS: covers iwl_link_ctx_cfg_cmd::qos_flags, + * iwl_link_ctx_cfg_cmd::ac, + * @LINK_CONTEXT_MODIFY_BEACON_TIMING: covers iwl_link_ctx_cfg_cmd::bi, + * iwl_link_ctx_cfg_cmd::dtim_interval, + * iwl_link_ctx_cfg_cmd::dtim_time, + * iwl_link_ctx_cfg_cmd::dtim_tsf, + * iwl_link_ctx_cfg_cmd::assoc_beacon_arrive_time. + * This flag can be set only once after assoc. + * @LINK_CONTEXT_MODIFY_HE_PARAMS: covers + * iwl_link_ctx_cfg_cmd::htc_trig_based_pkt_ext + * iwl_link_ctx_cfg_cmd::rand_alloc_ecwmin, + * iwl_link_ctx_cfg_cmd::rand_alloc_ecwmax, + * iwl_link_ctx_cfg_cmd::trig_based_txf, + * iwl_link_ctx_cfg_cmd::bss_color, + * iwl_link_ctx_cfg_cmd::ndp_fdbk_buff_th_exp, + * iwl_link_ctx_cfg_cmd::ref_bssid_addr + * iwl_link_ctx_cfg_cmd::bssid_index, + * iwl_link_ctx_cfg_cmd::frame_time_rts_th. + * This flag can be set any time. + * @LINK_CONTEXT_MODIFY_BSS_COLOR_DISABLE: covers + * iwl_link_ctx_cfg_cmd::bss_color_disable + * @LINK_CONTEXT_MODIFY_EHT_PARAMS: covers iwl_link_ctx_cfg_cmd::puncture_mask. + * This flag can be set only if the MAC that this link relates to has + * eht_support set to true. + * @LINK_CONTEXT_MODIFY_ALL: set all above flags + */ +enum iwl_link_ctx_modify_flags { + LINK_CONTEXT_MODIFY_ACTIVE = BIT(0), + LINK_CONTEXT_MODIFY_RATES_INFO = BIT(1), + LINK_CONTEXT_MODIFY_PROTECT_FLAGS = BIT(2), + LINK_CONTEXT_MODIFY_QOS_PARAMS = BIT(3), + LINK_CONTEXT_MODIFY_BEACON_TIMING = BIT(4), + LINK_CONTEXT_MODIFY_HE_PARAMS = BIT(5), + LINK_CONTEXT_MODIFY_BSS_COLOR_DISABLE = BIT(6), + LINK_CONTEXT_MODIFY_EHT_PARAMS = BIT(7), + LINK_CONTEXT_MODIFY_ALL = 0xff, +}; /* LINK_CONTEXT_MODIFY_MASK_E_VER_1 */ + +/** + * enum iwl_link_ctx_protection_flags - link protection flags + * @LINK_PROT_FLG_TGG_PROTECT: 11g protection when transmitting OFDM frames, + * this will require CCK RTS/CTS2self. + * RTS/CTS will protect full burst time. + * @LINK_PROT_FLG_HT_PROT: enable HT protection + * @LINK_PROT_FLG_FAT_PROT: protect 40 MHz transmissions + * @LINK_PROT_FLG_SELF_CTS_EN: allow CTS2self + */ +enum iwl_link_ctx_protection_flags { + LINK_PROT_FLG_TGG_PROTECT = BIT(0), + LINK_PROT_FLG_HT_PROT = BIT(1), + LINK_PROT_FLG_FAT_PROT = BIT(2), + LINK_PROT_FLG_SELF_CTS_EN = BIT(3), +}; /* LINK_PROTECT_FLAGS_E_VER_1 */ + +/** + * enum iwl_link_ctx_flags - link context flags + * + * @LINK_FLG_BSS_COLOR_DIS: BSS color disable, don't use the BSS + * color for RX filter but use MAC header + * enabled AGG, i.e. both BACK and non-BACK frames in a single AGG + * @LINK_FLG_MU_EDCA_CW: indicates that there is an element of MU EDCA + * parameter set, i.e. the backoff counters for trig-based ACs + * @LINK_FLG_RU_2MHZ_BLOCK: indicates that 26-tone RU OFDMA transmission are + * not allowed (as there are OBSS that might classify such transmissions as + * radar pulses). + */ +enum iwl_link_ctx_flags { + LINK_FLG_BSS_COLOR_DIS = BIT(0), + LINK_FLG_MU_EDCA_CW = BIT(1), + LINK_FLG_RU_2MHZ_BLOCK = BIT(2), +}; /* LINK_CONTEXT_FLAG_E_VER_1 */ + +/** + * struct iwl_link_config_cmd - command structure to configure the LINK context + * in MLD API + * ( LINK_CONFIG_CMD =0x9 ) + * + * @action: action to perform, one of FW_CTXT_ACTION_* + * @link_id: the id of the link that this cmd configures + * @mac_id: interface ID. Relevant only if action is FW_CTXT_ACTION_ADD + * @phy_id: PHY index. Can be changed only if the link was inactive + * (and stays inactive). If the link is active (or becomes active), + * this field is ignored. + * @local_link_addr: the links MAC address. Can be changed only if the link was + * inactive (and stays inactive). If the link is active + * (or becomes active), this field is ignored. + * @reserved_for_local_link_addr: reserved + * @modify_mask: from &enum iwl_link_ctx_modify_flags, selects what to change. + * Relevant only if action is FW_CTXT_ACTION_MODIFY + * @active: indicates whether the link is active or not + * @listen_lmac: indicates whether the link should be allocated on the Listen + * Lmac or on the Main Lmac. Cannot be changed on an active Link. + * Relevant only for eSR. + * @cck_rates: basic rates available for CCK + * @ofdm_rates: basic rates available for OFDM + * @cck_short_preamble: 1 for enabling short preamble, 0 otherwise + * @short_slot: 1 for enabling short slots, 0 otherwise + * @protection_flags: combination of &enum iwl_link_ctx_protection_flags + * @qos_flags: from &enum iwl_mac_qos_flags + * @ac: one iwl_mac_qos configuration for each AC + * @htc_trig_based_pkt_ext: default PE in 4us units + * @rand_alloc_ecwmin: random CWmin = 2**ECWmin-1 + * @rand_alloc_ecwmax: random CWmax = 2**ECWmax-1 + * @ndp_fdbk_buff_th_exp: set exponent for the NDP feedback buffered threshold + * @trig_based_txf: MU EDCA Parameter set for the trigger based traffic queues + * @dtim_time: DTIM arrival time in system time + * @dtim_tsf: DTIM arrival time in TSF + * @assoc_beacon_arrive_time: TSF of first beacon after association + * @bi: beacon interval in TU, applicable only when associated + * @dtim_interval: DTIM interval in TU. + * Relevant only for GO, otherwise this is offloaded. + * @beacon_template: beacon template ID. For GO only + * @puncture_mask: puncture mask for EHT + * @frame_time_rts_th: HE duration RTS threshold, in units of 32us + * @flags: a combination from &enum iwl_link_ctx_flags + * @flags_mask: what of %flags have changed. Also &enum iwl_link_ctx_flags + * Below fields are for multi-bssid: + * @ref_bssid_addr: reference BSSID used by the AP + * @reserved_for_ref_bssid_addr: reserved + * @bssid_index: index of the associated VAP + * @bss_color: 11ax AP ID that is used in the HE SIG-A to mark inter BSS frame + * @reserved: alignment + */ +struct iwl_link_config_cmd { + __le32 action; + __le32 link_id; + __le32 mac_id; + __le32 phy_id; + u8 local_link_addr[6]; + __le16 reserved_for_local_link_addr; + __le32 modify_mask; + __le32 active; + __le32 listen_lmac; + __le32 cck_rates; + __le32 ofdm_rates; + __le32 cck_short_preamble; + __le32 short_slot; + __le32 protection_flags; + /* MAC_QOS_PARAM_API_S_VER_1 */ + __le32 qos_flags; + struct iwl_ac_qos ac[AC_NUM + 1]; + u8 htc_trig_based_pkt_ext; + u8 rand_alloc_ecwmin; + u8 rand_alloc_ecwmax; + u8 ndp_fdbk_buff_th_exp; + struct iwl_he_backoff_conf trig_based_txf[AC_NUM]; + __le32 dtim_time; + __le64 dtim_tsf; + __le32 assoc_beacon_arrive_time; + __le32 bi; + __le32 dtim_interval; + __le32 beacon_template; + __le16 puncture_mask; + __le16 frame_time_rts_th; + __le32 flags; + __le32 flags_mask; + /* The below fields are for multi-bssid */ + u8 ref_bssid_addr[6]; + __le16 reserved_for_ref_bssid_addr; + u8 bssid_index; + u8 bss_color; + u8 reserved[2]; +} __packed; /* LINK_CONTEXT_CONFIG_CMD_API_S_VER_1 */ + #endif /* __iwl_fw_api_mac_cfg_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index d71f0a99b7c45..ec6346c6c8e45 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -7,7 +7,7 @@ iwlmvm-y += power.o coex.o iwlmvm-y += tt.o offloading.o tdls.o iwlmvm-y += ftm-responder.o ftm-initiator.o iwlmvm-y += rfi.o -iwlmvm-y += mld-key.o mld-mac.o +iwlmvm-y += mld-key.o mld-mac.o link.o iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwlmvm-$(CONFIG_PM) += d3.o diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c new file mode 100644 index 0000000000000..2688cb49c9512 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (C) 2022 Intel Corporation + */ +#include "mvm.h" + +static int iwl_mvm_link_cmd_send(struct iwl_mvm *mvm, + struct iwl_link_config_cmd *cmd, + enum iwl_ctxt_action action) +{ + int ret; + + cmd->action = cpu_to_le32(action); + ret = iwl_mvm_send_cmd_pdu(mvm, + WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD), 0, + sizeof(*cmd), cmd); + if (ret) + IWL_ERR(mvm, "Failed to send LINK_CONFIG_CMD (action:%d): %d\n", + action, ret); + return ret; +} + +int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_link_config_cmd cmd = {}; + + if (WARN_ON_ONCE(!mvmvif->phy_ctxt)) + return -EINVAL; + + /* Update SF - Disable if needed. if this fails, SF might still be on + * while many macs are bound, which is forbidden - so fail the binding. + */ + if (iwl_mvm_sf_update(mvm, vif, false)) + return -EINVAL; + + cmd.link_id = cpu_to_le32(mvmvif->phy_ctxt->id); + cmd.mac_id = cpu_to_le32(mvmvif->id); + cmd.phy_id = cpu_to_le32(mvmvif->phy_ctxt->id); + + memcpy(cmd.local_link_addr, vif->addr, ETH_ALEN); + + return iwl_mvm_link_cmd_send(mvm, &cmd, FW_CTXT_ACTION_ADD); +} + +int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + u32 changes, bool active) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_phy_ctxt *phyctxt = mvmvif->phy_ctxt; + struct iwl_link_config_cmd cmd = {}; + u32 ht_flag, flags = 0, flags_mask = 0; + + if (!phyctxt) + return -EINVAL; + + cmd.link_id = cpu_to_le32(phyctxt->id); + + /* The phy_id, link address and listen_lmac can be modified only until + * the link becomes active, otherwise they will be ignored. + */ + cmd.phy_id = cpu_to_le32(phyctxt->id); + cmd.mac_id = cpu_to_le32(mvmvif->id); + + memcpy(cmd.local_link_addr, vif->addr, ETH_ALEN); + + cmd.active = cpu_to_le32(active); + + /* TODO: set a value to cmd.listen_lmac when system requiremens + * will define it + */ + + iwl_mvm_set_fw_basic_rates(mvm, vif, &cmd.cck_rates, &cmd.ofdm_rates); + + cmd.cck_short_preamble = cpu_to_le32(vif->bss_conf.use_short_preamble); + cmd.short_slot = cpu_to_le32(vif->bss_conf.use_short_slot); + + /* The fw does not distinguish between ht and fat */ + ht_flag = LINK_PROT_FLG_HT_PROT | LINK_PROT_FLG_FAT_PROT; + iwl_mvm_set_fw_protection_flags(mvm, vif, &cmd.protection_flags, + ht_flag, LINK_PROT_FLG_TGG_PROTECT); + + iwl_mvm_set_fw_qos_params(mvm, vif, &cmd.ac[0], &cmd.qos_flags); + + /* We need the dtim_period to set the MAC as associated */ + if (vif->cfg.assoc && vif->bss_conf.dtim_period) + iwl_mvm_set_fw_dtim_tbtt(mvm, vif, &cmd.dtim_tsf, + &cmd.dtim_time, + &cmd.assoc_beacon_arrive_time); + else + changes &= ~LINK_CONTEXT_MODIFY_BEACON_TIMING; + + cmd.bi = cpu_to_le32(vif->bss_conf.beacon_int); + cmd.dtim_interval = cpu_to_le32(vif->bss_conf.beacon_int * + vif->bss_conf.dtim_period); + + /* TODO: Assumes that the beacon id == mac context id */ + cmd.beacon_template = cpu_to_le32(mvmvif->id); + + if (!vif->bss_conf.he_support || iwlwifi_mod_params.disable_11ax || + !vif->cfg.assoc) { + changes &= ~LINK_CONTEXT_MODIFY_HE_PARAMS; + goto send_cmd; + } + + cmd.htc_trig_based_pkt_ext = vif->bss_conf.htc_trig_based_pkt_ext; + + if (vif->bss_conf.uora_exists) { + cmd.rand_alloc_ecwmin = + vif->bss_conf.uora_ocw_range & 0x7; + cmd.rand_alloc_ecwmax = + (vif->bss_conf.uora_ocw_range >> 3) & 0x7; + } + + /* TODO how to set ndp_fdbk_buff_th_exp? */ + + if (iwl_mvm_set_fw_mu_edca_params(mvm, mvmvif, + &cmd.trig_based_txf[0])) { + flags |= LINK_FLG_MU_EDCA_CW; + flags_mask |= LINK_FLG_MU_EDCA_CW; + } + + if (vif->bss_conf.eht_puncturing && !iwlwifi_mod_params.disable_11be) + cmd.puncture_mask = cpu_to_le16(vif->bss_conf.eht_puncturing); + else + /* This flag can be set only if the MAC has eht support */ + changes &= ~LINK_CONTEXT_MODIFY_EHT_PARAMS; + + cmd.bss_color = vif->bss_conf.he_bss_color.color; + + if (!vif->bss_conf.he_bss_color.enabled) { + flags |= LINK_FLG_BSS_COLOR_DIS; + flags_mask |= LINK_FLG_BSS_COLOR_DIS; + } + + cmd.frame_time_rts_th = cpu_to_le16(vif->bss_conf.frame_time_rts_th); + + /* Block 26-tone RU OFDMA transmissions */ + if (mvmvif->he_ru_2mhz_block) { + flags |= LINK_FLG_RU_2MHZ_BLOCK; + flags_mask |= LINK_FLG_RU_2MHZ_BLOCK; + } + + if (vif->bss_conf.nontransmitted) { + ether_addr_copy(cmd.ref_bssid_addr, + vif->bss_conf.transmitter_bssid); + cmd.bssid_index = vif->bss_conf.bssid_index; + } + +send_cmd: + cmd.modify_mask = cpu_to_le32(changes); + cmd.flags = cpu_to_le32(flags); + cmd.flags_mask = cpu_to_le32(flags_mask); + + return iwl_mvm_link_cmd_send(mvm, &cmd, FW_CTXT_ACTION_MODIFY); +} + +int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_link_config_cmd cmd = {}; + int ret; + + if (WARN_ON_ONCE(!mvmvif->phy_ctxt)) + return -EINVAL; + + cmd.link_id = cpu_to_le32(mvmvif->phy_ctxt->id); + ret = iwl_mvm_link_cmd_send(mvm, &cmd, FW_CTXT_ACTION_REMOVE); + + if (!ret) + if (iwl_mvm_sf_update(mvm, vif, true)) + IWL_ERR(mvm, "Failed to update SF state\n"); + + return ret; +} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index b21327ec44016..4d7ad91c53d4a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -396,9 +396,8 @@ static void iwl_mvm_ack_rates(struct iwl_mvm *mvm, *ofdm_rates = ofdm; } -static void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - __le32 *cck_rates, __le32 *ofdm_rates) +void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + __le32 *cck_rates, __le32 *ofdm_rates) { struct ieee80211_chanctx_conf *chanctx; u8 cck_ack_rates, ofdm_ack_rates; @@ -414,11 +413,10 @@ static void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, *ofdm_rates = cpu_to_le32((u32)ofdm_ack_rates); } -static void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - __le32 *protection_flags, - u32 ht_flag, - u32 tgg_flag) +void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + __le32 *protection_flags, u32 ht_flag, + u32 tgg_flag) { /* for both sta and ap, ht_operation_mode hold the protection_mode */ u8 protection_mode = vif->bss_conf.ht_operation_mode & @@ -460,9 +458,8 @@ static void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, } } -static void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct iwl_ac_qos *ac, __le32 *qos_flags) +void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct iwl_ac_qos *ac, __le32 *qos_flags) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); int i; @@ -572,10 +569,9 @@ static int iwl_mvm_mac_ctxt_send_cmd(struct iwl_mvm *mvm, return ret; } -static void iwl_mvm_set_fw_dtim_tbtt(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - __le64 *dtim_tsf, __le32 *dtim_time, - __le32 *assoc_beacon_arrive_time) +void iwl_mvm_set_fw_dtim_tbtt(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + __le64 *dtim_tsf, __le32 *dtim_time, + __le32 *assoc_beacon_arrive_time) { u32 dtim_offs; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 210b134c735fb..cf08cb834cc49 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2000,10 +2000,9 @@ static void iwl_mvm_get_optimal_ppe_info(struct iwl_he_pkt_ext_v2 *pkt_ext, * This function sets the MU EDCA parameters ans returns whether MU EDCA * is enabled or not */ -static bool iwl_mvm_set_fw_mu_edca_params(struct iwl_mvm *mvm, - struct iwl_mvm_vif *mvmvif, - struct iwl_he_backoff_conf - *trig_based_txf) +bool iwl_mvm_set_fw_mu_edca_params(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + struct iwl_he_backoff_conf *trig_based_txf) { int i; /* Mark MU EDCA as enabled, unless none detected on some AC */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 1287207a34b3b..4663cb7a0b102 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1640,6 +1640,20 @@ u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef); /* MAC (virtual interface) programming */ +void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + __le32 *cck_rates, __le32 *ofdm_rates); +void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + __le32 *protection_flags, u32 ht_flag, + u32 tgg_flag); +void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct iwl_ac_qos *ac, __le32 *qos_flags); +bool iwl_mvm_set_fw_mu_edca_params(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + struct iwl_he_backoff_conf *trig_based_txf); +void iwl_mvm_set_fw_dtim_tbtt(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + __le64 *dtim_tsf, __le32 *dtim_time, + __le32 *assoc_beacon_arrive_time); __le32 iwl_mac_ctxt_p2p_dev_has_extended_disc(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_mac_ctxt_cmd_ap_set_filter_flags(struct iwl_mvm *mvm, @@ -1702,6 +1716,12 @@ void iwl_mvm_channel_switch_error_notif(struct iwl_mvm *mvm, int iwl_mvm_binding_add_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +/* Links */ +int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + u32 changes, bool active); +int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif); + /* Quota management */ static inline size_t iwl_mvm_quota_cmd_size(struct iwl_mvm *mvm) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 69c390ff27453..c3767f62026a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -522,6 +522,7 @@ static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = { HCMD_NAME(CHANNEL_SWITCH_TIME_EVENT_CMD), HCMD_NAME(SESSION_PROTECTION_CMD), HCMD_NAME(MAC_CONFIG_CMD), + HCMD_NAME(LINK_CONFIG_CMD), HCMD_NAME(SESSION_PROTECTION_NOTIF), HCMD_NAME(CHANNEL_SWITCH_START_NOTIF), }; -- GitLab From 006c152ac9e56ac7871efa995854c3ff8cf6915a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:20 +0200 Subject: [PATCH 0459/2078] wifi: iwlwifi: mvm: add support for the new STA related commands As a part of the new MLD FW API changes, we have new commands for STA related operations (add/remove/aux/disable tx). Add structures and enum definitions, along with part of the functions that sends this commands. This functions will be in used and more will be added in the next patches. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.132873ce015c.I7b12a77e5be066730762e6ceeeaa7190293c3df1@changeid Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 128 ++++++++ .../net/wireless/intel/iwlwifi/mvm/Makefile | 2 +- .../net/wireless/intel/iwlwifi/mvm/mld-sta.c | 281 ++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 16 +- 6 files changed, 431 insertions(+), 8 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 301f34f135238..a111e7366d49d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -41,6 +41,22 @@ enum iwl_mac_conf_subcmd_ids { * @LINK_CONFIG_CMD: &struct iwl_link_config_cmd */ LINK_CONFIG_CMD = 0x9, + /** + * @STA_CONFIG_CMD: &struct iwl_mvm_sta_cfg_cmd + */ + STA_CONFIG_CMD = 0xA, + /** + * @AUX_STA_CMD: &struct iwl_mvm_aux_sta_cmd + */ + AUX_STA_CMD = 0xB, + /** + * @STA_REMOVE_CMD: &struct iwl_mvm_remove_sta_cmd + */ + STA_REMOVE_CMD = 0xC, + /** + * @STA_DISABLE_TX_CMD: &struct iwl_mvm_sta_disable_tx_cmd + */ + STA_DISABLE_TX_CMD = 0xD, /** * @SESSION_PROTECTION_NOTIF: &struct iwl_mvm_session_prot_notif */ @@ -466,4 +482,116 @@ struct iwl_link_config_cmd { u8 reserved[2]; } __packed; /* LINK_CONTEXT_CONFIG_CMD_API_S_VER_1 */ +/** + * enum iwl_fw_sta_type - FW station types + * @STATION_TYPE_PEER: represents a peer - AP in BSS, a TDLS sta, a client in + * P2P. + * @STATION_TYPE_BCAST_MGMT: The station used to send beacons and + * probe responses. Also used for traffic injection in sniffer mode + * @STATION_TYPE_MCAST: the station used for BCAST / MCAST in GO. Will be + * suspended / resumed at the right timing depending on the clients' + * power save state and the DTIM timing + */ +enum iwl_fw_sta_type { + STATION_TYPE_PEER, + STATION_TYPE_BCAST_MGMT, + STATION_TYPE_MCAST, +}; /* STATION_TYPE_E_VER_1 */ + +/** + * struct iwl_mvm_sta_cfg_cmd - cmd structure to add a peer sta to the uCode's + * station table + * ( STA_CONFIG_CMD = 0xA ) + * + * @sta_id: index of station in uCode's station table + * @link_id: the id of the link that is used to communicate with this sta + * @peer_mld_address: the peers mld address + * @reserved_for_peer_mld_address: reserved + * @peer_link_address: the address of the link that is used to communicate + * with this sta + * @reserved_for_peer_link_address: reserved + * @station_type: type of this station. See &enum iwl_fw_sta_type + * @assoc_id: for GO only + * @beamform_flags: beam forming controls + * @mfp: indicates whether the STA uses management frame protection or not. + * @mimo: indicates whether the sta uses mimo or not + * @mimo_protection: indicates whether the sta uses mimo protection or not + * @ack_enabled: indicates that the AP supports receiving ACK- + * enabled AGG, i.e. both BACK and non-BACK frames in a single AGG + * @trig_rnd_alloc: indicates that trigger based random allocation + * is enabled according to UORA element existence + * @tx_ampdu_spacing: minimum A-MPDU spacing: + * 4 - 2us density, 5 - 4us density, 6 - 8us density, 7 - 16us density + * @tx_ampdu_max_size: maximum A-MPDU length: 0 - 8K, 1 - 16K, 2 - 32K, + * 3 - 64K, 4 - 128K, 5 - 256K, 6 - 512K, 7 - 1024K. + * @sp_length: the size of the SP in actual number of frames + * @uapsd_acs: 4 LS bits are trigger enabled ACs, 4 MS bits are the deliver + * enabled ACs. + * @pkt_ext: optional, exists according to PPE-present bit in the HE/EHT-PHY + * capa + * @htc_flags: which features are supported in HTC + */ +struct iwl_mvm_sta_cfg_cmd { + __le32 sta_id; + __le32 link_id; + u8 peer_mld_address[ETH_ALEN]; + __le16 reserved_for_peer_mld_address; + u8 peer_link_address[ETH_ALEN]; + __le16 reserved_for_peer_link_address; + __le32 station_type; + __le32 assoc_id; + __le32 beamform_flags; + __le32 mfp; + __le32 mimo; + __le32 mimo_protection; + __le32 ack_enabled; + __le32 trig_rnd_alloc; + __le32 tx_ampdu_spacing; + __le32 tx_ampdu_max_size; + __le32 sp_length; + __le32 uapsd_acs; + struct iwl_he_pkt_ext_v2 pkt_ext; + __le32 htc_flags; +} __packed; /* STA_CMD_API_S_VER_1 */ + +/** + * struct iwl_mvm_aux_sta_cmd - command for AUX STA configuration + * ( AUX_STA_CMD = 0xB ) + * + * @sta_id: index of aux sta to configure + * @lmac_id: ? + * @mac_addr: mac addr of the auxilary sta + * @reserved_for_mac_addr: reserved + */ +struct iwl_mvm_aux_sta_cmd { + __le32 sta_id; + __le32 lmac_id; + u8 mac_addr[ETH_ALEN]; + __le16 reserved_for_mac_addr; + +} __packed; /* AUX_STA_CMD_API_S_VER_1 */ + +/** + * struct iwl_mvm_remove_sta_cmd - a cmd structure to remove a sta added by + * STA_CONFIG_CMD or AUX_STA_CONFIG_CMD + * ( STA_REMOVE_CMD = 0xC ) + * + * @sta_id: index of station to remove + */ +struct iwl_mvm_remove_sta_cmd { + __le32 sta_id; +} __packed; /* REMOVE_STA_API_S_VER_1 */ + +/** + * struct iwl_mvm_sta_disable_tx_cmd - disable / re-enable tx to a sta + * ( STA_DISABLE_TX_CMD = 0xD ) + * + * @sta_id: index of the station to disable tx to + * @disable: indicates if to disable or re-enable tx + */ +struct iwl_mvm_sta_disable_tx_cmd { + __le32 sta_id; + __le32 disable; +} __packed; /* STA_DISABLE_TX_API_S_VER_1 */ + #endif /* __iwl_fw_api_mac_cfg_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index ec6346c6c8e45..0e9b5381e2659 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -7,7 +7,7 @@ iwlmvm-y += power.o coex.o iwlmvm-y += tt.o offloading.o tdls.o iwlmvm-y += ftm-responder.o ftm-initiator.o iwlmvm-y += rfi.o -iwlmvm-y += mld-key.o mld-mac.o link.o +iwlmvm-y += mld-key.o mld-mac.o link.o mld-sta.o iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwlmvm-$(CONFIG_PM) += d3.o diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c new file mode 100644 index 0000000000000..cef4fc441afe6 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (C) 2022 Intel Corporation + */ +#include "mvm.h" + +static int iwl_mvm_mld_send_sta_cmd(struct iwl_mvm *mvm, + struct iwl_mvm_sta_cfg_cmd *cmd) +{ + int ret = iwl_mvm_send_cmd_pdu(mvm, + WIDE_ID(MAC_CONF_GROUP, STA_CONFIG_CMD), + 0, sizeof(*cmd), cmd); + if (ret) + IWL_ERR(mvm, "STA_CONFIG_CMD send failed, ret=0x%x\n", ret); + return ret; +} + +/* + * Add an internal station to the FW table + */ +static int iwl_mvm_mld_add_int_sta_to_fw(struct iwl_mvm *mvm, + struct iwl_mvm_int_sta *sta, + const u8 *addr, + u16 phy_id) +{ + struct iwl_mvm_sta_cfg_cmd cmd; + + lockdep_assert_held(&mvm->mutex); + + memset(&cmd, 0, sizeof(cmd)); + cmd.sta_id = cpu_to_le32((u8)sta->sta_id); + + cmd.link_id = cpu_to_le32(phy_id); + + cmd.station_type = cpu_to_le32(sta->type); + + if (addr) { + memcpy(cmd.peer_mld_address, addr, ETH_ALEN); + memcpy(cmd.peer_link_address, addr, ETH_ALEN); + } + + return iwl_mvm_mld_send_sta_cmd(mvm, &cmd); +} + +/* + * Remove a station from the FW table. Before sending the command to remove + * the station validate that the station is indeed known to the driver (sanity + * only). + */ +static int iwl_mvm_mld_rm_sta_from_fw(struct iwl_mvm *mvm, u32 sta_id) +{ + struct ieee80211_sta *sta; + struct iwl_mvm_remove_sta_cmd rm_sta_cmd = { + .sta_id = cpu_to_le32(sta_id), + }; + int ret; + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + + /* Note: internal stations are marked as error values */ + if (!sta) { + IWL_ERR(mvm, "Invalid station id\n"); + return -EINVAL; + } + + ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(MAC_CONF_GROUP, STA_REMOVE_CMD), + 0, sizeof(rm_sta_cmd), &rm_sta_cmd); + if (ret) { + IWL_ERR(mvm, "Failed to remove station. Id=%d\n", sta_id); + return ret; + } + + return 0; +} + +/* + * Adds an internal sta to the FW table with its queues + */ +static int iwl_mvm_mld_add_int_sta_with_queue(struct iwl_mvm *mvm, + struct iwl_mvm_int_sta *sta, + const u8 *addr, int phy_id, + u16 *queue, u8 tid, + unsigned int *_wdg_timeout) +{ + int ret, txq; + unsigned int wdg_timeout = _wdg_timeout ? *_wdg_timeout : + mvm->trans->trans_cfg->base_params->wd_timeout; + + if (WARN_ON_ONCE(sta->sta_id == IWL_MVM_INVALID_STA)) + return -ENOSPC; + + ret = iwl_mvm_mld_add_int_sta_to_fw(mvm, sta, addr, phy_id); + if (ret) + return ret; + + /* + * For 22000 firmware and on we cannot add queue to a station unknown + * to firmware so enable queue here - after the station was added + */ + txq = iwl_mvm_tvqm_enable_txq(mvm, NULL, sta->sta_id, tid, + wdg_timeout); + if (txq < 0) { + iwl_mvm_mld_rm_sta_from_fw(mvm, sta->sta_id); + return txq; + } + *queue = txq; + + return 0; +} + +/* + * Adds a new int sta: allocate it in the driver, add it to the FW table, + * and add its queues. + */ +static int iwl_mvm_mld_add_int_sta(struct iwl_mvm *mvm, + struct iwl_mvm_int_sta *int_sta, u16 *queue, + enum nl80211_iftype iftype, + enum iwl_fw_sta_type sta_type, + int phy_id, const u8 *addr, u8 tid, + unsigned int *wdg_timeout) +{ + int ret; + + lockdep_assert_held(&mvm->mutex); + + /* qmask argument is not used in the new tx api, send a don't care */ + ret = iwl_mvm_allocate_int_sta(mvm, int_sta, 0, iftype, + sta_type); + if (ret) + return ret; + + ret = iwl_mvm_mld_add_int_sta_with_queue(mvm, int_sta, addr, phy_id, + queue, tid, wdg_timeout); + if (ret) { + iwl_mvm_dealloc_int_sta(mvm, int_sta); + return ret; + } + + return 0; +} + +/* Allocate a new station entry for the broadcast station to the given vif, + * and send it to the FW. + * Note that each P2P mac should have its own broadcast station. + */ +int iwl_mvm_mld_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm_int_sta *bsta = &mvmvif->bcast_sta; + static const u8 _baddr[] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + const u8 *baddr = _baddr; + unsigned int wdg_timeout = + iwl_mvm_get_wd_timeout(mvm, vif, false, false); + u16 *queue; + + lockdep_assert_held(&mvm->mutex); + + if (vif->type == NL80211_IFTYPE_ADHOC) + baddr = vif->bss_conf.bssid; + + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) { + queue = &mvm->probe_queue; + } else if (vif->type == NL80211_IFTYPE_P2P_DEVICE) { + queue = &mvm->p2p_dev_queue; + } else { + WARN(1, "Missing required TXQ for adding bcast STA\n"); + return -EINVAL; + } + + return iwl_mvm_mld_add_int_sta(mvm, bsta, queue, + ieee80211_vif_type_p2p(vif), + STATION_TYPE_BCAST_MGMT, + mvmvif->phy_ctxt->id, baddr, + IWL_MAX_TID_COUNT, &wdg_timeout); +} + +/* Allocate a new station entry for the sniffer station to the given vif, + * and send it to the FW. + */ +int iwl_mvm_mld_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + lockdep_assert_held(&mvm->mutex); + + return iwl_mvm_mld_add_int_sta(mvm, &mvm->snif_sta, &mvm->snif_queue, + vif->type, STATION_TYPE_BCAST_MGMT, + mvmvif->phy_ctxt->id, NULL, + IWL_MAX_TID_COUNT, NULL); +} + +static int iwl_mvm_mld_disable_txq(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + u16 *queueptr, u8 tid) +{ + struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + int queue = *queueptr; + int ret = 0; + + if (mvm->sta_remove_requires_queue_remove) { + u32 cmd_id = WIDE_ID(DATA_PATH_GROUP, + SCD_QUEUE_CONFIG_CMD); + struct iwl_scd_queue_cfg_cmd remove_cmd = { + .operation = cpu_to_le32(IWL_SCD_QUEUE_REMOVE), + .u.remove.tid = cpu_to_le32(tid), + .u.remove.sta_mask = + cpu_to_le32(BIT(mvmsta->sta_id)), + }; + + ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, + sizeof(remove_cmd), + &remove_cmd); + } + + iwl_trans_txq_free(mvm->trans, queue); + *queueptr = IWL_MVM_INVALID_QUEUE; + + return ret; +} + +/* Removes a sta from the FW table, disable its queues, and dealloc it + */ +static int iwl_mvm_mld_rm_int_sta(struct iwl_mvm *mvm, + struct iwl_mvm_int_sta *int_sta, + bool flush, u8 tid, u16 *queuptr) +{ + int ret; + + lockdep_assert_held(&mvm->mutex); + + if (WARN_ON_ONCE(int_sta->sta_id == IWL_MVM_INVALID_STA)) + return -EINVAL; + + if (flush) + iwl_mvm_flush_sta(mvm, int_sta, true); + + iwl_mvm_mld_disable_txq(mvm, NULL, queuptr, tid); + + ret = iwl_mvm_mld_rm_sta_from_fw(mvm, int_sta->sta_id); + if (ret) + IWL_WARN(mvm, "Failed sending remove station\n"); + + iwl_mvm_dealloc_int_sta(mvm, int_sta); + + return ret; +} + +int iwl_mvm_mld_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + u16 *queueptr; + + lockdep_assert_held(&mvm->mutex); + + switch (vif->type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + queueptr = &mvm->probe_queue; + break; + case NL80211_IFTYPE_P2P_DEVICE: + queueptr = &mvm->p2p_dev_queue; + break; + default: + WARN(1, "Can't free bcast queue on vif type %d\n", + vif->type); + return -EINVAL; + } + + return iwl_mvm_mld_rm_int_sta(mvm, &mvmvif->bcast_sta, true, + IWL_MAX_TID_COUNT, queueptr); +} + +int iwl_mvm_mld_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) +{ + lockdep_assert_held(&mvm->mutex); + + return iwl_mvm_mld_rm_int_sta(mvm, &mvm->snif_sta, false, + IWL_MAX_TID_COUNT, &mvm->snif_queue); +} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index c3767f62026a3..03da4e53de677 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -523,6 +523,10 @@ static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = { HCMD_NAME(SESSION_PROTECTION_CMD), HCMD_NAME(MAC_CONFIG_CMD), HCMD_NAME(LINK_CONFIG_CMD), + HCMD_NAME(STA_CONFIG_CMD), + HCMD_NAME(AUX_STA_CMD), + HCMD_NAME(STA_REMOVE_CMD), + HCMD_NAME(STA_DISABLE_TX_CMD), HCMD_NAME(SESSION_PROTECTION_NOTIF), HCMD_NAME(CHANNEL_SWITCH_START_NOTIF), }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 46af2b422849f..313c5416ae5e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -783,9 +783,9 @@ static int iwl_mvm_get_queue_size(struct ieee80211_sta *sta) return IWL_DEFAULT_QUEUE_SIZE; } -static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, - struct ieee80211_sta *sta, - u8 sta_id, u8 tid, unsigned int timeout) +int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + u8 sta_id, u8 tid, unsigned int timeout) { int queue, size; @@ -2005,7 +2005,7 @@ int iwl_mvm_rm_sta_id(struct iwl_mvm *mvm, int iwl_mvm_allocate_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta, u32 qmask, enum nl80211_iftype iftype, - enum iwl_sta_type type) + u8 type) { if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) || sta->sta_id == IWL_MVM_INVALID_STA) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index f1a4fc3e40387..d11851b476842 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2021 Intel Corporation + * Copyright (C) 2012-2014, 2018-2022 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2016 Intel Deutschland GmbH */ @@ -436,7 +436,7 @@ iwl_mvm_sta_from_mac80211(struct ieee80211_sta *sta) */ struct iwl_mvm_int_sta { u32 sta_id; - enum iwl_sta_type type; + u8 type; u32 tfd_queue_msk; }; @@ -519,7 +519,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_allocate_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta, u32 qmask, enum nl80211_iftype iftype, - enum iwl_sta_type type); + u8 type); void iwl_mvm_dealloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta); int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); @@ -551,4 +551,14 @@ int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, void iwl_mvm_cancel_channel_switch(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 mac_id); +/* Queues */ +int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + u8 sta_id, u8 tid, unsigned int timeout); + +/* New MLD STA related APIs */ +int iwl_mvm_mld_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_mld_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_mld_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_mld_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); #endif /* __sta_h__ */ -- GitLab From 1ab26632332eac61cc24464a74fd9bcf5ec5167b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:21 +0200 Subject: [PATCH 0460/2078] wifi: iwlwifi: mvm: Add an add_interface() callback for mld mode As the MLD mode and its new APIs are introduced, we've decided to add a new ieee80211_ops dedicated for MLD callbacks. Add the MLD add_interface() callback which uses the new MLD APIs added the previous patches. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.6adc29bff39b.I97ed469028009be9392dcc6f7b5ffbe45f4b2c43@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/Makefile | 2 +- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 85 ++++++++----- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 118 ++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 8 ++ 4 files changed, 184 insertions(+), 29 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index 0e9b5381e2659..b82f79ac53785 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -7,7 +7,7 @@ iwlmvm-y += power.o coex.o iwlmvm-y += tt.o offloading.o tdls.o iwlmvm-y += ftm-responder.o ftm-initiator.o iwlmvm-y += rfi.o -iwlmvm-y += mld-key.o mld-mac.o link.o mld-sta.o +iwlmvm-y += mld-key.o mld-mac.o link.o mld-sta.o mld-mac80211.o iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwlmvm-$(CONFIG_PM) += d3.o diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index cf08cb834cc49..73b164aad86db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1216,7 +1216,7 @@ static void iwl_mvm_mac_stop(struct ieee80211_hw *hw) cancel_work_sync(&mvm->async_handlers_wk); } -static struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm) +struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm) { u16 i; @@ -1350,7 +1350,7 @@ static void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw, iwl_mvm_post_channel_switch(hw, vif); } -static void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk) +void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk) { struct iwl_mvm_vif *mvmvif; struct ieee80211_vif *vif; @@ -1384,12 +1384,17 @@ iwl_mvm_chandef_get_primary_80(struct cfg80211_chan_def *chandef) return (control_start - data_start) / 80; } -static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) +/* + * Returns true if addding the interface is done + * (either with success or failure) + */ +bool iwl_mvm_mac_add_interface_common(struct iwl_mvm *mvm, + struct ieee80211_hw *hw, + struct ieee80211_vif *vif, int *ret) { - struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - int ret; + + lockdep_assert_held(&mvm->mutex); mvmvif->mvm = mvm; RCU_INIT_POINTER(mvmvif->probe_resp_data, NULL); @@ -1400,17 +1405,15 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, * don't really have to check the types. */ - mutex_lock(&mvm->mutex); - /* make sure that beacon statistics don't go backwards with FW reset */ if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) mvmvif->beacon_stats.accu_num_beacons += mvmvif->beacon_stats.num_beacons; /* Allocate resources for the MAC context, and add it to the fw */ - ret = iwl_mvm_mac_ctxt_init(mvm, vif); - if (ret) - goto out_unlock; + *ret = iwl_mvm_mac_ctxt_init(mvm, vif); + if (*ret) + return true; rcu_assign_pointer(mvm->vif_id_to_mac[mvmvif->id], vif); @@ -1427,27 +1430,48 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, */ if (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_ADHOC) { - ret = iwl_mvm_alloc_bcast_sta(mvm, vif); - if (ret) { - IWL_ERR(mvm, "Failed to allocate bcast sta\n"); - goto out_unlock; - } - - /* - * Only queue for this station is the mcast queue, - * which shouldn't be in TFD mask anyway - */ - ret = iwl_mvm_allocate_int_sta(mvm, &mvmvif->mcast_sta, - 0, vif->type, - IWL_STA_MULTICAST); - if (ret) - goto out_unlock; - iwl_mvm_vif_dbgfs_register(mvm, vif); - goto out_unlock; + return true; } mvmvif->features |= hw->netdev_features; + return false; +} + +static int iwl_mvm_alloc_bcast_mcast_sta(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + lockdep_assert_held(&mvm->mutex); + + ret = iwl_mvm_alloc_bcast_sta(mvm, vif); + if (ret) { + IWL_ERR(mvm, "Failed to allocate bcast sta\n"); + return ret; + } + + /* + * Only queue for this station is the mcast queue, + * which shouldn't be in TFD mask anyway + */ + return iwl_mvm_allocate_int_sta(mvm, &mvmvif->mcast_sta, 0, vif->type, + IWL_STA_MULTICAST); +} + +static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + mutex_lock(&mvm->mutex); + + /* Common for MLD and non-MLD API */ + if (iwl_mvm_mac_add_interface_common(mvm, hw, vif, &ret)) + goto out; ret = iwl_mvm_mac_ctxt_add(mvm, vif); if (ret) @@ -1516,6 +1540,11 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, mvm->csme_vif = vif; } +out: + if (!ret && (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC)) + ret = iwl_mvm_alloc_bcast_mcast_sta(mvm, vif); + goto out_unlock; out_unbind: diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c new file mode 100644 index 0000000000000..b233bdd68b37d --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (C) 2022 Intel Corporation + */ +#include "mvm.h" + +static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + mutex_lock(&mvm->mutex); + + /* Common for MLD and non-MLD API */ + if (iwl_mvm_mac_add_interface_common(mvm, hw, vif, &ret)) + goto out_unlock; + + ret = iwl_mvm_mld_mac_ctxt_add(mvm, vif); + if (ret) + goto out_unlock; + + ret = iwl_mvm_power_update_mac(mvm); + if (ret) + goto out_remove_mac; + + /* beacon filtering */ + ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0); + if (ret) + goto out_remove_mac; + + if (!mvm->bf_allowed_vif && + vif->type == NL80211_IFTYPE_STATION && !vif->p2p) { + mvm->bf_allowed_vif = mvmvif; + vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER | + IEEE80211_VIF_SUPPORTS_CQM_RSSI; + } + + /* + * P2P_DEVICE interface does not have a channel context assigned to it, + * so a dedicated PHY context is allocated to it and the corresponding + * MAC context is bound to it at this stage. + */ + if (vif->type == NL80211_IFTYPE_P2P_DEVICE) { + mvmvif->phy_ctxt = iwl_mvm_get_free_phy_ctxt(mvm); + if (!mvmvif->phy_ctxt) { + ret = -ENOSPC; + goto out_free_bf; + } + + iwl_mvm_phy_ctxt_ref(mvm, mvmvif->phy_ctxt); + ret = iwl_mvm_add_link(mvm, vif); + if (ret) + goto out_unref_phy; + + ret = iwl_mvm_link_changed(mvm, vif, + LINK_CONTEXT_MODIFY_ACTIVE | + LINK_CONTEXT_MODIFY_RATES_INFO, + true); + if (ret) + goto out_remove_link; + + ret = iwl_mvm_mld_add_bcast_sta(mvm, vif); + if (ret) + goto out_remove_link; + + /* Save a pointer to p2p device vif, so it can later be used to + * update the p2p device MAC when a GO is started/stopped + */ + mvm->p2p_device_vif = vif; + } + + iwl_mvm_tcm_add_vif(mvm, vif); + INIT_DELAYED_WORK(&mvmvif->csa_work, + iwl_mvm_channel_switch_disconnect_wk); + + if (vif->type == NL80211_IFTYPE_MONITOR) { + mvm->monitor_on = true; + ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS); + } + + iwl_mvm_vif_dbgfs_register(mvm, vif); + + if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && + vif->type == NL80211_IFTYPE_STATION && !vif->p2p && + !mvm->csme_vif && mvm->mei_registered) { + iwl_mei_set_nic_info(vif->addr, mvm->nvm_data->hw_addr); + iwl_mei_set_netdev(ieee80211_vif_to_wdev(vif)->netdev); + mvm->csme_vif = vif; + } + + goto out_unlock; + + out_remove_link: + /* Link needs to be deactivated before removal */ + iwl_mvm_link_changed(mvm, vif, LINK_CONTEXT_MODIFY_ACTIVE, false); + iwl_mvm_remove_link(mvm, vif); + out_unref_phy: + iwl_mvm_phy_ctxt_unref(mvm, mvmvif->phy_ctxt); + out_free_bf: + if (mvm->bf_allowed_vif == mvmvif) { + mvm->bf_allowed_vif = NULL; + vif->driver_flags &= ~(IEEE80211_VIF_BEACON_FILTER | + IEEE80211_VIF_SUPPORTS_CQM_RSSI); + } + out_remove_mac: + mvmvif->phy_ctxt = NULL; + iwl_mvm_mld_mac_ctxt_remove(mvm, vif); + out_unlock: + mutex_unlock(&mvm->mutex); + + return ret; +} + +const struct ieee80211_ops iwl_mvm_mld_hw_ops = { + .add_interface = iwl_mvm_mld_mac_add_interface, +}; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 4663cb7a0b102..50230a9dbcc8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -72,6 +72,7 @@ #define IWL_MVM_OFFCHANNEL_QUEUE 0 extern const struct ieee80211_ops iwl_mvm_hw_ops; +extern const struct ieee80211_ops iwl_mvm_mld_hw_ops; /** * struct iwl_mvm_mod_params - module parameters for iwlmvm @@ -1624,6 +1625,7 @@ void iwl_mvm_rx_shared_mem_cfg_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); /* MVM PHY */ +struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm); int iwl_mvm_phy_ctxt_add(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt, struct cfg80211_chan_def *chandef, u8 chains_static, u8 chains_dynamic); @@ -1640,6 +1642,9 @@ u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef); /* MAC (virtual interface) programming */ +bool iwl_mvm_mac_add_interface_common(struct iwl_mvm *mvm, + struct ieee80211_hw *hw, + struct ieee80211_vif *vif, int *ret); void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, struct ieee80211_vif *vif, __le32 *cck_rates, __le32 *ofdm_rates); void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, @@ -2158,6 +2163,9 @@ static inline u8 iwl_mvm_phy_band_from_nl80211(enum nl80211_band band) } } +/* Channel Switch */ +void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk); + /* Channel info utils */ static inline bool iwl_mvm_has_ultra_hb_channel(struct iwl_mvm *mvm) { -- GitLab From 60efeca1c6a75d7f518c963fa4f3694dcfc1147e Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:22 +0200 Subject: [PATCH 0461/2078] wifi: iwlwifi: mvm: Add a remove_interface() callback for mld mode As the MLD mode and its new APIs are introduced, we've decided to add a new ieee80211_ops dedicated for MLD callbacks. Add the MLD remove_interface() callback which uses the new MLD APIs. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.b87c5c0a4b6b.I631173a73d6ffd7232aa539ea8b356a222fac398@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 32 +++++++++++++---- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 34 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 +-- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 2 ++ 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 73b164aad86db..bdec5ae5e83a0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1579,7 +1579,11 @@ static void iwl_mvm_prepare_mac_removal(struct iwl_mvm *mvm, } } -static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, +/* This function is doing the common part of removing the interface for + * both - MLD and non-MLD modes. Returns true if removing the interface + * is done + */ +bool iwl_mvm_mac_remove_interface_common(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); @@ -1628,11 +1632,22 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, mvm->noa_duration = 0; } #endif - iwl_mvm_dealloc_int_sta(mvm, &mvmvif->mcast_sta); - iwl_mvm_dealloc_bcast_sta(mvm, vif); - goto out_release; + return true; } + iwl_mvm_power_update_mac(mvm); + return false; +} + +static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (iwl_mvm_mac_remove_interface_common(hw, vif)) + goto out; + if (vif->type == NL80211_IFTYPE_P2P_DEVICE) { mvm->p2p_device_vif = NULL; iwl_mvm_rm_p2p_bcast_sta(mvm, vif); @@ -1641,7 +1656,6 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, mvmvif->phy_ctxt = NULL; } - iwl_mvm_power_update_mac(mvm); iwl_mvm_mac_ctxt_remove(mvm, vif); RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL); @@ -1649,7 +1663,13 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, if (vif->type == NL80211_IFTYPE_MONITOR) mvm->monitor_on = false; -out_release: +out: + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) { + iwl_mvm_dealloc_int_sta(mvm, &mvmvif->mcast_sta); + iwl_mvm_dealloc_bcast_sta(mvm, vif); + } + mutex_unlock(&mvm->mutex); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index b233bdd68b37d..8dca72ec55cb4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -113,6 +113,40 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, return ret; } +static void iwl_mvm_mld_mac_remove_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (iwl_mvm_mac_remove_interface_common(hw, vif)) + goto out; + + if (vif->type == NL80211_IFTYPE_P2P_DEVICE) { + mvm->p2p_device_vif = NULL; + iwl_mvm_mld_rm_bcast_sta(mvm, vif); + /* Link needs to be deactivated before removal */ + iwl_mvm_link_changed(mvm, vif, LINK_CONTEXT_MODIFY_ACTIVE, + false); + iwl_mvm_remove_link(mvm, vif); + iwl_mvm_phy_ctxt_unref(mvm, mvmvif->phy_ctxt); + mvmvif->phy_ctxt = NULL; + } + + iwl_mvm_mld_mac_ctxt_remove(mvm, vif); + + RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL); + + if (vif->type == NL80211_IFTYPE_MONITOR) { + mvm->monitor_on = false; + __clear_bit(IEEE80211_HW_RX_INCLUDES_FCS, mvm->hw->flags); + } + +out: + mutex_unlock(&mvm->mutex); +} + const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .add_interface = iwl_mvm_mld_mac_add_interface, + .remove_interface = iwl_mvm_mld_mac_remove_interface, }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 50230a9dbcc8c..32e0af87b5286 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1645,6 +1645,8 @@ u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef); bool iwl_mvm_mac_add_interface_common(struct iwl_mvm *mvm, struct ieee80211_hw *hw, struct ieee80211_vif *vif, int *ret); +bool iwl_mvm_mac_remove_interface_common(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); void iwl_mvm_set_fw_basic_rates(struct iwl_mvm *mvm, struct ieee80211_vif *vif, __le32 *cck_rates, __le32 *ofdm_rates); void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 313c5416ae5e9..bbcc7459f3a07 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2256,8 +2256,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) return 0; } -static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) +void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); u16 *queueptr, queue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index d11851b476842..cad6e879c9996 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -510,6 +510,8 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm, u32 lmac_id); int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm); int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, + struct ieee80211_vif *vif); int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_add_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_send_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); -- GitLab From 8a919a78a469bb4e01ced9766bdbb7a03c8a4edf Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:23 +0200 Subject: [PATCH 0462/2078] wifi: iwlwifi: mvm: refactor __iwl_mvm_assign_vif_chanctx() Since parts of the functionality of this function is going to be used also by the MLD version of it, put in a separate function the parts that are common for both MLD and non-MLD modes. The common function will later be used in the MLD ops. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.844755701cac.I1c650718ad2381eabc38f4103c1aac67936a1ffc@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index bdec5ae5e83a0..8123dad3b710c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4405,15 +4405,21 @@ out_unlock: mutex_unlock(&mvm->mutex); } -static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_chanctx_conf *ctx, - bool switching_chanctx) +/* + * This function executes the common part for MLD and non-MLD modes. + * + * Returns true if we're done assigning the chanctx + * (either on failure or success) + */ +static bool __iwl_mvm_assign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx, + int *ret) { u16 *phy_ctxt_id = (u16 *)ctx->drv_priv; struct iwl_mvm_phy_ctxt *phy_ctxt = &mvm->phy_ctxts[*phy_ctxt_id]; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - int ret; lockdep_assert_held(&mvm->mutex); @@ -4432,19 +4438,32 @@ static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm, * The AP binding flow is handled as part of the start_ap flow * (in bss_info_changed), similarly for IBSS. */ - ret = 0; - goto out; + *ret = 0; + return true; case NL80211_IFTYPE_STATION: - mvmvif->csa_bcn_pending = false; break; case NL80211_IFTYPE_MONITOR: /* always disable PS when a monitor interface is active */ mvmvif->ps_disabled = true; break; default: - ret = -EINVAL; - goto out; + *ret = -EINVAL; + return true; } + return false; +} + +static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + if (__iwl_mvm_assign_vif_chanctx_common(mvm, vif, ctx, + switching_chanctx, &ret)) + goto out; ret = iwl_mvm_binding_add_vif(mvm, vif); if (ret) @@ -4478,7 +4497,12 @@ static int __iwl_mvm_assign_vif_chanctx(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_changed(mvm, vif, false, NULL); } - if (switching_chanctx && vif->type == NL80211_IFTYPE_STATION) { + if (vif->type == NL80211_IFTYPE_STATION) { + if (!switching_chanctx) { + mvmvif->csa_bcn_pending = false; + goto out; + } + mvmvif->csa_bcn_pending = true; if (!fw_has_capa(&mvm->fw->ucode_capa, @@ -4506,6 +4530,7 @@ out: mvmvif->phy_ctxt = NULL; return ret; } + static int iwl_mvm_assign_vif_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, -- GitLab From 50e81437a4831e32b0d04a828aa18c02356ee23e Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:24 +0200 Subject: [PATCH 0463/2078] wifi: iwlwifi: mvm: add an assign_vif_chanctx() callback for MLD mode This is another patch in the series adding all the ops for the new MLD ieee80211_ops. The callback added here uses the new MLD FW API. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.d3753975e720.I45f89cc81370d2cf8d4f51748ccb3ec675eff1bd@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 9 ++- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 60 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 6 ++ 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 8123dad3b710c..d4a3d9259150e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4411,11 +4411,10 @@ out_unlock: * Returns true if we're done assigning the chanctx * (either on failure or success) */ -static bool __iwl_mvm_assign_vif_chanctx_common(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_chanctx_conf *ctx, - bool switching_chanctx, - int *ret) +bool __iwl_mvm_assign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx, int *ret) { u16 *phy_ctxt_id = (u16 *)ctx->drv_priv; struct iwl_mvm_phy_ctxt *phy_ctxt = &mvm->phy_ctxts[*phy_ctxt_id]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 8dca72ec55cb4..81f88d1b0feb5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -146,7 +146,67 @@ out: mutex_unlock(&mvm->mutex); } +static int __iwl_mvm_mld_assign_vif_chanctx(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + int ret; + + if (__iwl_mvm_assign_vif_chanctx_common(mvm, vif, ctx, + switching_chanctx, &ret)) + goto out; + + ret = iwl_mvm_add_link(mvm, vif); + if (ret) + goto out; + ret = iwl_mvm_link_changed(mvm, vif, LINK_CONTEXT_MODIFY_ACTIVE, + true); + if (ret) + goto out_remove_link; + + /* + * Power state must be updated before quotas, + * otherwise fw will complain. + */ + iwl_mvm_power_update_mac(mvm); + + if (vif->type == NL80211_IFTYPE_MONITOR) { + ret = iwl_mvm_mld_add_snif_sta(mvm, vif); + if (ret) + goto out_remove_link; + } + + goto out; + +out_remove_link: + /* Link needs to be deactivated before removal */ + iwl_mvm_link_changed(mvm, vif, LINK_CONTEXT_MODIFY_ACTIVE, false); + iwl_mvm_remove_link(mvm, vif); + iwl_mvm_power_update_mac(mvm); +out: + if (ret) + mvmvif->phy_ctxt = NULL; + return ret; +} + +static int iwl_mvm_mld_assign_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx_conf *ctx) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + int ret; + + mutex_lock(&mvm->mutex); + ret = __iwl_mvm_mld_assign_vif_chanctx(mvm, vif, ctx, false); + mutex_unlock(&mvm->mutex); + + return ret; +} const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .add_interface = iwl_mvm_mld_mac_add_interface, .remove_interface = iwl_mvm_mld_mac_remove_interface, + .assign_vif_chanctx = iwl_mvm_mld_assign_vif_chanctx, }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 32e0af87b5286..1221a0717e61d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2168,6 +2168,12 @@ static inline u8 iwl_mvm_phy_band_from_nl80211(enum nl80211_band band) /* Channel Switch */ void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk); +/* Channel Context */ +bool __iwl_mvm_assign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx, int *ret); + /* Channel info utils */ static inline bool iwl_mvm_has_ultra_hb_channel(struct iwl_mvm *mvm) { -- GitLab From daddfae5527164edee09bcca0848c10ab72c5c5b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:25 +0200 Subject: [PATCH 0464/2078] wifi: iwlwifi: mvm: refactor __iwl_mvm_unassign_vif_chanctx() Since parts of the functionality of this function is going to be used also by the MLD version of it, put in a separate function the parts that are common for both MLD and non-MLD modes. The common function will later be used in the MLD ops. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.fa05929badb9.I2222dc86cf7d7a7bb58c6a2f2529c8089bfe58b4@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d4a3d9259150e..c006d68a785ea 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4545,52 +4545,69 @@ static int iwl_mvm_assign_vif_chanctx(struct ieee80211_hw *hw, return ret; } -static void __iwl_mvm_unassign_vif_chanctx(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_chanctx_conf *ctx, - bool switching_chanctx) +/* + * This function executes the common part for MLD and non-MLD modes. + * + * Returns if chanctx unassign chanctx is done + * (either on failure or success) + */ +static bool __iwl_mvm_unassign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + bool switching_chanctx) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct ieee80211_vif *disabled_vif = NULL; lockdep_assert_held(&mvm->mutex); iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data); switch (vif->type) { case NL80211_IFTYPE_ADHOC: - goto out; + return true; case NL80211_IFTYPE_MONITOR: mvmvif->monitor_active = false; mvmvif->ps_disabled = false; - iwl_mvm_rm_snif_sta(mvm, vif); break; case NL80211_IFTYPE_AP: /* This part is triggered only during CSA */ if (!switching_chanctx || !mvmvif->ap_ibss_active) - goto out; + return true; mvmvif->csa_countdown = false; - /* Set CS bit on all the stations */ - iwl_mvm_modify_all_sta_disable_tx(mvm, mvmvif, true); - /* Save blocked iface, the timeout is set on the next beacon */ rcu_assign_pointer(mvm->csa_tx_blocked_vif, vif); mvmvif->ap_ibss_active = false; break; - case NL80211_IFTYPE_STATION: - if (!switching_chanctx) - break; + default: + break; + } + return false; +} - disabled_vif = vif; +static void __iwl_mvm_unassign_vif_chanctx(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct ieee80211_vif *disabled_vif = NULL; + + if (__iwl_mvm_unassign_vif_chanctx_common(mvm, vif, switching_chanctx)) + goto out; + if (vif->type == NL80211_IFTYPE_MONITOR) + iwl_mvm_rm_snif_sta(mvm, vif); + + if (vif->type == NL80211_IFTYPE_AP) + /* Set CS bit on all the stations */ + iwl_mvm_modify_all_sta_disable_tx(mvm, mvmvif, true); + + if (vif->type == NL80211_IFTYPE_STATION && switching_chanctx) { + disabled_vif = vif; if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD)) iwl_mvm_mac_ctxt_changed(mvm, vif, true, NULL); - break; - default: - break; } iwl_mvm_update_quotas(mvm, false, disabled_vif); -- GitLab From 6f71e90e65e9bb5d5e12199e19670bb4973a6e39 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 14 Mar 2023 19:49:26 +0200 Subject: [PATCH 0465/2078] wifi: iwlwifi: mvm: add an unassign_vif_chanctx() callback for MLD mode This is another patch in the series adding all the ops for the new MLD ieee80211_ops. The callback added here uses the new MLD FW API Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.5d4bcd384425.I263eef3aad8efe23a597843fe7c56924038c8fdc@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 6 +- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 41 +++++++++++++ .../net/wireless/intel/iwlwifi/mvm/mld-sta.c | 58 +++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 + drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 7 +++ 5 files changed, 112 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index c006d68a785ea..93ca92e8eb98b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4551,9 +4551,9 @@ static int iwl_mvm_assign_vif_chanctx(struct ieee80211_hw *hw, * Returns if chanctx unassign chanctx is done * (either on failure or success) */ -static bool __iwl_mvm_unassign_vif_chanctx_common(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - bool switching_chanctx) +bool __iwl_mvm_unassign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + bool switching_chanctx) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 81f88d1b0feb5..f402e235525cf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -205,8 +205,49 @@ static int iwl_mvm_mld_assign_vif_chanctx(struct ieee80211_hw *hw, return ret; } + +static void __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_chanctx_conf *ctx, + bool switching_chanctx) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + + if (__iwl_mvm_unassign_vif_chanctx_common(mvm, vif, switching_chanctx)) + goto out; + + if (vif->type == NL80211_IFTYPE_MONITOR) + iwl_mvm_mld_rm_snif_sta(mvm, vif); + + if (vif->type == NL80211_IFTYPE_AP) + /* Set CS bit on all the stations */ + iwl_mvm_mld_modify_all_sta_disable_tx(mvm, mvmvif, true); + + /* Link needs to be deactivated before removal */ + iwl_mvm_link_changed(mvm, vif, LINK_CONTEXT_MODIFY_ACTIVE, false); + iwl_mvm_remove_link(mvm, vif); + +out: + if (switching_chanctx) + return; + mvmvif->phy_ctxt = NULL; + iwl_mvm_power_update_mac(mvm); +} + +static void iwl_mvm_mld_unassign_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx_conf *ctx) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + + mutex_lock(&mvm->mutex); + __iwl_mvm_mld_unassign_vif_chanctx(mvm, vif, ctx, false); + mutex_unlock(&mvm->mutex); +} const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .add_interface = iwl_mvm_mld_mac_add_interface, .remove_interface = iwl_mvm_mld_mac_remove_interface, .assign_vif_chanctx = iwl_mvm_mld_assign_vif_chanctx, + .unassign_vif_chanctx = iwl_mvm_mld_unassign_vif_chanctx, }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index cef4fc441afe6..4ee6f9b250d3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -279,3 +279,61 @@ int iwl_mvm_mld_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) return iwl_mvm_mld_rm_int_sta(mvm, &mvm->snif_sta, false, IWL_MAX_TID_COUNT, &mvm->snif_queue); } + +static void iwl_mvm_mld_sta_modify_disable_tx(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + bool disable) +{ + struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); + struct iwl_mvm_sta_disable_tx_cmd cmd; + int ret; + + spin_lock_bh(&mvm_sta->lock); + + if (mvm_sta->disable_tx == disable) { + spin_unlock_bh(&mvm_sta->lock); + return; + } + + mvm_sta->disable_tx = disable; + + cmd.sta_id = cpu_to_le32(mvm_sta->sta_id); + cmd.disable = cpu_to_le32(disable); + + ret = iwl_mvm_send_cmd_pdu(mvm, + WIDE_ID(MAC_CONF_GROUP, STA_DISABLE_TX_CMD), + CMD_ASYNC, sizeof(cmd), &cmd); + if (ret) + IWL_ERR(mvm, + "Failed to send STA_DISABLE_TX_CMD command (%d)\n", + ret); + + spin_unlock_bh(&mvm_sta->lock); +} + +void iwl_mvm_mld_modify_all_sta_disable_tx(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + bool disable) +{ + struct ieee80211_sta *sta; + struct iwl_mvm_sta *mvm_sta; + int i; + + rcu_read_lock(); + + /* Block/unblock all the stations of the given mvmvif */ + for (i = 0; i < mvm->fw->ucode_capa.num_stations; i++) { + sta = rcu_dereference(mvm->fw_id_to_mac_id[i]); + if (IS_ERR_OR_NULL(sta)) + continue; + + mvm_sta = iwl_mvm_sta_from_mac80211(sta); + if (mvm_sta->mac_id_n_color != + FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color)) + continue; + + iwl_mvm_mld_sta_modify_disable_tx(mvm, sta, disable); + } + + rcu_read_unlock(); +} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 1221a0717e61d..7f73179e765e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2173,6 +2173,9 @@ bool __iwl_mvm_assign_vif_chanctx_common(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx, bool switching_chanctx, int *ret); +bool __iwl_mvm_unassign_vif_chanctx_common(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + bool switching_chanctx); /* Channel info utils */ static inline bool iwl_mvm_has_ultra_hb_channel(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index cad6e879c9996..f0d22e9a117bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -545,6 +545,7 @@ void iwl_mvm_sta_modify_disable_tx_ap(struct iwl_mvm *mvm, void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, struct iwl_mvm_vif *mvmvif, bool disable); + void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, @@ -559,8 +560,14 @@ int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, u8 sta_id, u8 tid, unsigned int timeout); /* New MLD STA related APIs */ +/* STA */ int iwl_mvm_mld_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mld_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mld_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mld_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif); + +/* Queues */ +void iwl_mvm_mld_modify_all_sta_disable_tx(struct iwl_mvm *mvm, + struct iwl_mvm_vif *mvmvif, + bool disable); #endif /* __sta_h__ */ -- GitLab From 093e71e26d67c2b0d194b34a7b56432bab049e90 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 14 Mar 2023 19:49:27 +0200 Subject: [PATCH 0466/2078] wifi: iwlwifi: mvm: remove setting of 'sta' parameter cppcheck reports [drivers/net/wireless/intel/iwlwifi/mvm/rs.c:2686]: (warning) Assignment of function parameter has no effect outside the function. Did you forget dereferencing it? The setting of the 'sta' parameter is not needed. In the if-check that sets it to NULL, mvm_sta is also set to NULL. Then the next statement checks if mvm_sta is NULL and does an early return. So remove setting sta. Signed-off-by: Tom Rix Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.711fe28cfdd6.I2f723f9d44f65720baaf3e84b72109759350a8f5@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 1f81dff71bc47..7c976b7f1cd74 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2682,7 +2682,6 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, /* if vif isn't initialized mvm doesn't know about * this station, so don't do anything with the it */ - sta = NULL; mvm_sta = NULL; } -- GitLab From e2e76bdcd3fd663bfbddbc09e167e7c1069659cd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 14 Mar 2023 19:49:28 +0200 Subject: [PATCH 0467/2078] wifi: iwlwifi: fix typos in comment Spelling mistakes (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.fb2fc470e949.I1f3a347b533bcdb6bcd310f752ab3349800efb49@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/img.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h index f878ac5088017..f5c4d93d10331 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h @@ -182,10 +182,10 @@ struct iwl_dump_exclude { * @enhance_sensitivity_table: device can do enhanced sensitivity. * @init_evtlog_ptr: event log offset for init ucode. * @init_evtlog_size: event log size for init ucode. - * @init_errlog_ptr: error log offfset for init ucode. + * @init_errlog_ptr: error log offset for init ucode. * @inst_evtlog_ptr: event log offset for runtime ucode. * @inst_evtlog_size: event log size for runtime ucode. - * @inst_errlog_ptr: error log offfset for runtime ucode. + * @inst_errlog_ptr: error log offset for runtime ucode. * @type: firmware type (&enum iwl_fw_type) * @human_readable: human readable version * we get the ALIVE from the uCode -- GitLab From d2abe692fe6bea753aaa38c081f322fbed5930e9 Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Tue, 14 Mar 2023 19:49:29 +0200 Subject: [PATCH 0468/2078] wifi: iwlwifi: Remove prohibited spaces This patch addresses the error from checkpatch.pl regarding the presence of prohibited spaces. Signed-off-by: Solomon Tan Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.557d6841a166.I0957923fe9ea20c1ec9140477033548ccfe93e9a@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index aa8e08487b52a..923bbfc151dd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -81,7 +81,7 @@ static const u16 iwl_nvm_channels[] = { /* 2.4 GHz */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 5 GHz */ - 36, 40, 44 , 48, 52, 56, 60, 64, + 36, 40, 44, 48, 52, 56, 60, 64, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 149, 153, 157, 161, 165 }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 85b99316d029a..5790435dc69e8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -214,9 +214,9 @@ static ssize_t iwl_dbgfs_set_nic_temperature_read(struct file *file, int pos; if (!mvm->temperature_test) - pos = scnprintf(buf , sizeof(buf), "disabled\n"); + pos = scnprintf(buf, sizeof(buf), "disabled\n"); else - pos = scnprintf(buf , sizeof(buf), "%d\n", mvm->temperature); + pos = scnprintf(buf, sizeof(buf), "%d\n", mvm->temperature); return simple_read_from_buffer(user_buf, count, ppos, buf, pos); } @@ -261,7 +261,7 @@ static ssize_t iwl_dbgfs_set_nic_temperature_write(struct iwl_mvm *mvm, mvm->temperature = temperature; } IWL_DEBUG_TEMP(mvm, "%sabling debug set temperature (temp = %d)\n", - mvm->temperature_test ? "En" : "Dis" , + mvm->temperature_test ? "En" : "Dis", mvm->temperature); /* handle the temperature change */ iwl_mvm_tt_handler(mvm); @@ -291,7 +291,7 @@ static ssize_t iwl_dbgfs_nic_temp_read(struct file *file, if (ret) return -EIO; - pos = scnprintf(buf , sizeof(buf), "%d\n", temp); + pos = scnprintf(buf, sizeof(buf), "%d\n", temp); return simple_read_from_buffer(user_buf, count, ppos, buf, pos); } -- GitLab From 473bc264356297b9df7ee3183b08b954be4a1152 Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Tue, 14 Mar 2023 19:49:30 +0200 Subject: [PATCH 0469/2078] wifi: iwlwifi: Add required space before open '(' This patch addresses the error from checkpatch.pl that a space is required before an open parenthesis. Signed-off-by: Solomon Tan Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.a21ba1967d94.Iaa52fc6517ea9efb3efc7b1b98f4df2a288de1c3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 3e9e9f13506b6..fe7dfdcd531b2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1037,7 +1037,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) ret = iwl_read_ppag_table(&mvm->fwrt, &cmd, &cmd_size); /* Not supporting PPAG table is a valid scenario */ - if(ret < 0) + if (ret < 0) return 0; IWL_DEBUG_RADIO(mvm, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); -- GitLab From f311d0113c7326ec411c9f81dbf77a2cc5ed00ff Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Tue, 14 Mar 2023 19:49:31 +0200 Subject: [PATCH 0470/2078] wifi: iwlwifi: Replace space with tabs as code indent This patch addresses the checkpatch.pl warning that code indent should use tabs. Signed-off-by: Solomon Tan Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.eaca2a9b08f2.Ic81d60185c13cfc750bf93fbabac57314cb5de13@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 7f73179e765e5..7cc3526e78ce1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1312,7 +1312,7 @@ static inline bool iwl_mvm_is_csum_supported(struct iwl_mvm *mvm) { return fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_CSUM_SUPPORT) && - !IWL_MVM_HW_CSUM_DISABLE; + !IWL_MVM_HW_CSUM_DISABLE; } static inline bool iwl_mvm_is_mplut_supported(struct iwl_mvm *mvm) -- GitLab From 71a54f7e74488f234fb56270253bcd08a3357d0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Mar 2023 19:49:32 +0200 Subject: [PATCH 0471/2078] wifi: iwlwifi: mvm: rs: print BAD_RATE for invalid HT/VHT index If there's a rate->index that maps inside the range but to an uninitialized value, then that's also a bad rate, avoid printing "(nil)" in that case and rather print the "BAD_RATE" string instead as in the else branch. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.06b38d160fc5.I45a9254d3658b1ce796aa4c427193d3cbf638d7e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 7c976b7f1cd74..bd135b7168cbc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -512,10 +512,10 @@ static char *rs_pretty_rate(const struct rs_rate *rate) (rate->index <= IWL_RATE_MCS_9_INDEX)) rate_str = ht_vht_rates[rate->index]; else - rate_str = "BAD_RATE"; + rate_str = NULL; sprintf(buf, "(%s|%s|%s)", rs_pretty_lq_type(rate->type), - iwl_rs_pretty_ant(rate->ant), rate_str); + iwl_rs_pretty_ant(rate->ant), rate_str ?: "BAD_RATE"); return buf; } -- GitLab From c2db01752735b522ae150b8698fd4e526efbbc20 Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Tue, 14 Mar 2023 19:49:33 +0200 Subject: [PATCH 0472/2078] wifi: iwlwifi: Update configurations for Bnj and Bz devices Add missing rf support for Bz device and B-Step for BnJ device Signed-off-by: Mukesh Sisodiya Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.14c373dcfe15.I96b96a8b149ac181e962f4b82e0d15b172823d0f@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 36 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 3 ++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 15 ++++++++ 3 files changed, 54 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 43fb5cf85f05a..bc33d19ca08da 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -57,6 +57,8 @@ #define IWL_BZ_A_MR_A_FW_PRE "iwlwifi-bz-a0-mr-a0-" #define IWL_BZ_A_FM_A_FW_PRE "iwlwifi-bz-a0-fm-a0-" #define IWL_BZ_A_FM4_A_FW_PRE "iwlwifi-bz-a0-fm4-a0-" +#define IWL_BZ_A_FM_B_FW_PRE "iwlwifi-bz-a0-fm-b0-" +#define IWL_BZ_A_FM4_B_FW_PRE "iwlwifi-bz-a0-fm4-b0-" #define IWL_GL_A_FM_A_FW_PRE "iwlwifi-gl-a0-fm-a0-" #define IWL_GL_B_FM_B_FW_PRE "iwlwifi-gl-b0-fm-b0-" #define IWL_BZ_Z_GF_A_FW_PRE "iwlwifi-bz-z0-gf-a0-" @@ -64,6 +66,7 @@ #define IWL_BNJ_A_FM4_A_FW_PRE "iwlwifi-BzBnj-a0-fm4-a0-" #define IWL_BNJ_B_FM4_B_FW_PRE "iwlwifi-BzBnj-b0-fm4-b0-" #define IWL_BNJ_A_GF_A_FW_PRE "iwlwifi-BzBnj-a0-gf-a0-" +#define IWL_BNJ_B_GF_A_FW_PRE "iwlwifi-BzBnj-b0-gf-a0-" #define IWL_BNJ_A_GF4_A_FW_PRE "iwlwifi-BzBnj-a0-gf4-a0-" #define IWL_BNJ_A_HR_B_FW_PRE "iwlwifi-BzBnj-a0-hr-b0-" #define IWL_BNJ_B_FM_B_FW_PRE "iwlwifi-BzBnj-b0-fm-b0-" @@ -125,6 +128,10 @@ IWL_BZ_A_FM_A_FW_PRE __stringify(api) ".ucode" #define IWL_BZ_A_FM4_A_MODULE_FIRMWARE(api) \ IWL_BZ_A_FM4_A_FW_PRE __stringify(api) ".ucode" +#define IWL_BZ_A_FM_B_MODULE_FIRMWARE(api) \ + IWL_BZ_A_FM_B_FW_PRE __stringify(api) ".ucode" +#define IWL_BZ_A_FM4_B_MODULE_FIRMWARE(api) \ + IWL_BZ_A_FM4_B_FW_PRE __stringify(api) ".ucode" #define IWL_GL_A_FM_A_MODULE_FIRMWARE(api) \ IWL_GL_A_FM_A_FW_PRE __stringify(api) ".ucode" #define IWL_GL_B_FM_B_MODULE_FIRMWARE(api) \ @@ -137,6 +144,8 @@ IWL_BNJ_B_FM4_B_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_GF_A_MODULE_FIRMWARE(api) \ IWL_BNJ_A_GF_A_FW_PRE __stringify(api) ".ucode" +#define IWL_BNJ_B_GF_A_MODULE_FIRMWARE(api) \ + IWL_BNJ_B_GF_A_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(api) \ IWL_BNJ_A_GF4_A_FW_PRE __stringify(api) ".ucode" #define IWL_BNJ_A_HR_B_MODULE_FIRMWARE(api) \ @@ -961,6 +970,22 @@ const struct iwl_cfg iwl_cfg_bz_a0_fm4_a0 = { .num_rbds = IWL_NUM_RBDS_AX210_HE, }; +const struct iwl_cfg iwl_cfg_bz_a0_fm_b0 = { + .fw_name_pre = IWL_BZ_A_FM_B_FW_PRE, + .uhb_supported = true, + IWL_DEVICE_BZ, + .features = IWL_TX_CSUM_NETIF_FLAGS_BZ | NETIF_F_RXCSUM, + .num_rbds = IWL_NUM_RBDS_AX210_HE, +}; + +const struct iwl_cfg iwl_cfg_bz_a0_fm4_b0 = { + .fw_name_pre = IWL_BZ_A_FM4_B_FW_PRE, + .uhb_supported = true, + IWL_DEVICE_BZ, + .features = IWL_TX_CSUM_NETIF_FLAGS_BZ | NETIF_F_RXCSUM, + .num_rbds = IWL_NUM_RBDS_AX210_HE, +}; + const struct iwl_cfg iwl_cfg_gl_a0_fm_a0 = { .fw_name_pre = IWL_GL_A_FM_A_FW_PRE, .uhb_supported = true, @@ -1017,6 +1042,14 @@ const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0 = { .num_rbds = IWL_NUM_RBDS_AX210_HE, }; +const struct iwl_cfg iwl_cfg_bnj_b0_gf_a0 = { + .fw_name_pre = IWL_BNJ_B_GF_A_FW_PRE, + .uhb_supported = true, + IWL_DEVICE_BZ, + .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM, + .num_rbds = IWL_NUM_RBDS_AX210_HE, +}; + const struct iwl_cfg iwl_cfg_bnj_a0_gf4_a0 = { .fw_name_pre = IWL_BNJ_A_GF4_A_FW_PRE, .uhb_supported = true, @@ -1067,13 +1100,16 @@ MODULE_FIRMWARE(IWL_BZ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BZ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BZ_A_MR_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BZ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_BZ_A_FM_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_GL_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_FM4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_B_FM4_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_BNJ_B_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BZ_A_FM4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_BZ_A_FM4_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_GL_B_FM_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_BNJ_B_FM_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index eaa0ff2736c50..a4c7fd929cc2f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -650,12 +650,15 @@ extern const struct iwl_cfg iwl_cfg_bz_a0_gf4_a0; extern const struct iwl_cfg iwl_cfg_bz_a0_mr_a0; extern const struct iwl_cfg iwl_cfg_bz_a0_fm_a0; extern const struct iwl_cfg iwl_cfg_bz_a0_fm4_a0; +extern const struct iwl_cfg iwl_cfg_bz_a0_fm_b0; +extern const struct iwl_cfg iwl_cfg_bz_a0_fm4_b0; extern const struct iwl_cfg iwl_cfg_gl_a0_fm_a0; extern const struct iwl_cfg iwl_cfg_gl_b0_fm_b0; extern const struct iwl_cfg iwl_cfg_bz_z0_gf_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_fm_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_fm4_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0; +extern const struct iwl_cfg iwl_cfg_bnj_b0_gf_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_gf4_a0; extern const struct iwl_cfg iwl_cfg_bnj_a0_hr_b0; extern const struct iwl_cfg iwl_cfg_bnj_b0_fm_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 8aa8a678475cb..077983913acd9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1159,6 +1159,16 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_NO_JACKET, iwl_cfg_bz_a0_fm4_a0, iwl_bz_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, IWL_CFG_IS_JACKET, + iwl_cfg_bz_a0_fm_b0, iwl_bz_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_IS_JACKET, + iwl_cfg_bz_a0_fm4_b0, iwl_bz_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, SILICON_A_STEP, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, @@ -1203,6 +1213,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, IWL_CFG_IS_JACKET, iwl_cfg_bnj_a0_gf_a0, iwl_bz_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_GL, SILICON_B_STEP, + IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, IWL_CFG_IS_JACKET, + iwl_cfg_bnj_b0_gf_a0, iwl_bz_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, -- GitLab From 8f55564c05123b790f25562fa02ca2e1dbf9c4f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Mar 2023 19:49:34 +0200 Subject: [PATCH 0473/2078] wifi: iwlwifi: fw: pnvm: fix uefi reduced TX power loading There are a number of issues here: * if trans->reduce_power_loaded is already true, we call iwl_trans_set_reduce_power() with an uninitialized len value * in this case we also clobber a previous load/setting * if iwl_uefi_get_reduced_power() returns an ERR_PTR() we try to kfree() it Move the iwl_trans_set_reduce_power() call into the success case only to fix these issues. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230314194113.aa2cf2281f5d.I33b4ab3427f1921c184c52fecd0f46781a89dc8a@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index b6d3ac6ed4402..c6f2672fdc737 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright(c) 2020-2021 Intel Corporation + * Copyright(c) 2020-2022 Intel Corporation */ #include "iwl-drv.h" @@ -318,7 +318,6 @@ parse: kfree(data); skip_parse: - data = NULL; /* now try to get the reduce power table, if not loaded yet */ if (!trans->reduce_power_loaded) { data = iwl_uefi_get_reduced_power(trans, &len); @@ -329,19 +328,16 @@ skip_parse: * trying again over and over. */ trans->reduce_power_loaded = true; - - goto skip_reduce_power; + } else { + ret = iwl_trans_set_reduce_power(trans, data, len); + if (ret) + IWL_DEBUG_FW(trans, + "Failed to set reduce power table %d\n", + ret); + kfree(data); } } - ret = iwl_trans_set_reduce_power(trans, data, len); - if (ret) - IWL_DEBUG_FW(trans, - "Failed to set reduce power table %d\n", - ret); - kfree(data); - -skip_reduce_power: iwl_init_notification_wait(notif_wait, &pnvm_wait, ntf_cmds, ARRAY_SIZE(ntf_cmds), iwl_pnvm_complete_fn, trans); -- GitLab From 4eca8cbf7ba83c3291b5841905ce64584036b1ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Mar 2023 13:29:20 +0100 Subject: [PATCH 0474/2078] wifi: iwlwifi: suppress printf warnings in tracing This can't really be fixed due to the macro layout of tracepoints (you'd need a special tracepoint macro for when this is needed), so just suppress the warnings. Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index 999b7c652289d..c190ec5effa11 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -12,6 +12,7 @@ #include "iwl-trans.h" #define CREATE_TRACE_POINTS +#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" #include "iwl-devtrace.h" EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event); -- GitLab From 31bf1dbccfb0a9861d4846755096b3fff5687f8a Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 10 Mar 2023 08:40:59 +0100 Subject: [PATCH 0475/2078] bpf: Fix attaching fentry/fexit/fmod_ret/lsm to modules This resolves two problems with attachment of fentry/fexit/fmod_ret/lsm to functions located in modules: 1. The verifier tries to find the address to attach to in kallsyms. This is always done by searching the entire kallsyms, not respecting the module in which the function is located. Such approach causes an incorrect attachment address to be computed if the function to attach to is shadowed by a function of the same name located earlier in kallsyms. 2. If the address to attach to is located in a module, the module reference is only acquired in register_fentry. If the module is unloaded between the place where the address is found (bpf_check_attach_target in the verifier) and register_fentry, it is possible that another module is loaded to the same address which may lead to potential errors. Since the attachment must contain the BTF of the program to attach to, we extract the module from it and search for the function address in the correct module (resolving problem no. 1). Then, the module reference is taken directly in bpf_check_attach_target and stored in the bpf program (in bpf_prog_aux). The reference is only released when the program is unloaded (resolving problem no. 2). Signed-off-by: Viktor Malik Acked-by: Jiri Olsa Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/3f6a9d8ae850532b5ef864ef16327b0f7a669063.1678432753.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 6 ++++++ kernel/bpf/trampoline.c | 28 ---------------------------- kernel/bpf/verifier.c | 18 +++++++++++++++++- kernel/module/internal.h | 5 +++++ 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 71cc92a4ba487..3ef98fb92987d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1103,6 +1103,7 @@ struct bpf_trampoline { struct bpf_attach_target_info { struct btf_func_model fmodel; long tgt_addr; + struct module *tgt_mod; const char *tgt_name; const struct btf_type *tgt_type; }; @@ -1406,6 +1407,7 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + struct module *mod; u32 num_exentries; struct exception_table_entry *extable; union { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5b88301a2ae02..099e9068bcdd8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2067,6 +2067,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) { bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); + module_put(prog->aux->mod); kvfree(prog->aux->jited_linfo); kvfree(prog->aux->linfo); kfree(prog->aux->kfunc_tab); @@ -3113,6 +3114,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, if (err) goto out_unlock; + if (tgt_info.tgt_mod) { + module_put(prog->aux->mod); + prog->aux->mod = tgt_info.tgt_mod; + } + tr = bpf_trampoline_get(key, &tgt_info); if (!tr) { err = -ENOMEM; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d0ed7d6f5eec5..f61d5138b12b9 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -172,26 +171,6 @@ out: return tr; } -static int bpf_trampoline_module_get(struct bpf_trampoline *tr) -{ - struct module *mod; - int err = 0; - - preempt_disable(); - mod = __module_text_address((unsigned long) tr->func.addr); - if (mod && !try_module_get(mod)) - err = -ENOENT; - preempt_enable(); - tr->mod = mod; - return err; -} - -static void bpf_trampoline_module_put(struct bpf_trampoline *tr) -{ - module_put(tr->mod); - tr->mod = NULL; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -202,8 +181,6 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); - if (!ret) - bpf_trampoline_module_put(tr); return ret; } @@ -238,9 +215,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) tr->func.ftrace_managed = true; } - if (bpf_trampoline_module_get(tr)) - return -ENOENT; - if (tr->func.ftrace_managed) { ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); @@ -248,8 +222,6 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); } - if (ret) - bpf_trampoline_module_put(tr); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bbd89279070a..60793f793ca6e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24,6 +24,7 @@ #include #include #include +#include "../module/internal.h" #include "disasm.h" @@ -18307,6 +18308,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, const char *tname; struct btf *btf; long addr = 0; + struct module *mod = NULL; if (!btf_id) { bpf_log(log, "Tracing programs must provide btf_id\n"); @@ -18480,8 +18482,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, else addr = (long) tgt_prog->aux->func[subprog]->bpf_func; } else { - addr = kallsyms_lookup_name(tname); + if (btf_is_module(btf)) { + mod = btf_try_get_module(btf); + if (mod) + addr = find_kallsyms_symbol_value(mod, tname); + else + addr = 0; + } else { + addr = kallsyms_lookup_name(tname); + } if (!addr) { + module_put(mod); bpf_log(log, "The address of function %s cannot be found\n", tname); @@ -18521,11 +18532,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, break; } if (ret) { + module_put(mod); bpf_log(log, "%s is not sleepable\n", tname); return ret; } } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { if (tgt_prog) { + module_put(mod); bpf_log(log, "can't modify return codes of BPF programs\n"); return -EINVAL; } @@ -18534,6 +18547,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, !check_attach_modify_return(addr, tname)) ret = 0; if (ret) { + module_put(mod); bpf_log(log, "%s() is not modifiable\n", tname); return ret; } @@ -18544,6 +18558,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, tgt_info->tgt_addr = addr; tgt_info->tgt_name = tname; tgt_info->tgt_type = t; + tgt_info->tgt_mod = mod; return 0; } @@ -18623,6 +18638,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) /* store info about the attachment target that will be used later */ prog->aux->attach_func_proto = tgt_info.tgt_type; prog->aux->attach_func_name = tgt_info.tgt_name; + prog->aux->mod = tgt_info.tgt_mod; if (tgt_prog) { prog->aux->saved_dst_prog_type = tgt_prog->type; diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 2e2bf236f5582..5c9170f9135ce 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -256,6 +256,11 @@ static inline bool sect_empty(const Elf_Shdr *sect) static inline void init_build_id(struct module *mod, const struct load_info *info) { } static inline void layout_symtab(struct module *mod, struct load_info *info) { } static inline void add_kallsyms(struct module *mod, const struct load_info *info) { } +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} #endif /* CONFIG_KALLSYMS */ #ifdef CONFIG_SYSFS -- GitLab From aa3d65de4b9004d799f97700751a86d3ebd7d5f9 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 10 Mar 2023 08:41:00 +0100 Subject: [PATCH 0476/2078] bpf/selftests: Test fentry attachment to shadowed functions Adds a new test that tries to attach a program to fentry of two functions of the same name, one located in vmlinux and the other in bpf_testmod. To avoid conflicts with existing tests, a new function "bpf_fentry_shadow_test" was created both in vmlinux and in bpf_testmod. The previous commit fixed a bug which caused this test to fail. The verifier would always use the vmlinux function's address as the target trampoline address, hence trying to create two trampolines for a single address, which is forbidden. The test (similarly to other fentry/fexit tests) is not working on arm64 at the moment. Signed-off-by: Viktor Malik Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/5fe2f364190b6f79b085066ed7c5989c5bc475fa.1678432753.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 5 + tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 6 + .../bpf/prog_tests/module_fentry_shadow.c | 128 ++++++++++++++++++ 4 files changed, 140 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6a8b33a103a40..71226f68270d9 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -560,6 +560,11 @@ long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) return (long)a + (long)b + (long)c + d; } +int noinline bpf_fentry_shadow_test(int a) +{ + return a + 1; +} + struct prog_test_member1 { int a; }; diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 99cc33c51eaac..0a6837f97c324 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -44,6 +44,7 @@ lookup_key # test_lookup_key__attach unexp lru_bug # lru_bug__attach unexpected error: -524 (errno 524) modify_return # modify_return__attach failed unexpected error: -524 (errno 524) module_attach # skel_attach skeleton attach failed: -524 +module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0 mptcp/base # run_test mptcp unexpected error: -524 (errno 524) netcnt # packets unexpected packets: actual 10001 != expected 10000 rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5e6e85c8d77de..7999476b9446c 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -268,6 +268,12 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .set = &bpf_testmod_check_kfunc_ids, }; +noinline int bpf_fentry_shadow_test(int a) +{ + return a + 2; +} +EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c new file mode 100644 index 0000000000000..c7636e18b1ebd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red Hat */ +#include +#include +#include "bpf/libbpf_internal.h" +#include "cgroup_helpers.h" + +static const char *module_name = "bpf_testmod"; +static const char *symbol_name = "bpf_fentry_shadow_test"; + +static int get_bpf_testmod_btf_fd(void) +{ + struct bpf_btf_info info; + char name[64]; + __u32 id = 0, len; + int err, fd; + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + log_err("failed to iterate BTF objects"); + return err; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; /* expected race: BTF was unloaded */ + err = -errno; + log_err("failed to get FD for BTF object #%d", id); + return err; + } + + len = sizeof(info); + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + err = -errno; + log_err("failed to get info for BTF object #%d", id); + close(fd); + return err; + } + + if (strcmp(name, module_name) == 0) + return fd; + + close(fd); + } + return -ENOENT; +} + +void test_module_fentry_shadow(void) +{ + struct btf *vmlinux_btf = NULL, *mod_btf = NULL; + int err, i; + int btf_fd[2] = {}; + int prog_fd[2] = {}; + int link_fd[2] = {}; + __s32 btf_id[2] = {}; + + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_FENTRY, + ); + + const struct bpf_insn trace_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux_btf")) + return; + + btf_fd[1] = get_bpf_testmod_btf_fd(); + if (!ASSERT_GE(btf_fd[1], 0, "get_bpf_testmod_btf_fd")) + goto out; + + mod_btf = btf_get_from_fd(btf_fd[1], vmlinux_btf); + if (!ASSERT_OK_PTR(mod_btf, "btf_get_from_fd")) + goto out; + + btf_id[0] = btf__find_by_name_kind(vmlinux_btf, symbol_name, BTF_KIND_FUNC); + if (!ASSERT_GT(btf_id[0], 0, "btf_find_by_name")) + goto out; + + btf_id[1] = btf__find_by_name_kind(mod_btf, symbol_name, BTF_KIND_FUNC); + if (!ASSERT_GT(btf_id[1], 0, "btf_find_by_name")) + goto out; + + for (i = 0; i < 2; i++) { + load_opts.attach_btf_id = btf_id[i]; + load_opts.attach_btf_obj_fd = btf_fd[i]; + prog_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", + trace_program, + sizeof(trace_program) / sizeof(struct bpf_insn), + &load_opts); + if (!ASSERT_GE(prog_fd[i], 0, "bpf_prog_load")) + goto out; + + /* If the verifier incorrectly resolves addresses of the + * shadowed functions and uses the same address for both the + * vmlinux and the bpf_testmod functions, this will fail on + * attempting to create two trampolines for the same address, + * which is forbidden. + */ + link_fd[i] = bpf_link_create(prog_fd[i], 0, BPF_TRACE_FENTRY, NULL); + if (!ASSERT_GE(link_fd[i], 0, "bpf_link_create")) + goto out; + } + + err = bpf_prog_test_run_opts(prog_fd[0], NULL); + ASSERT_OK(err, "running test"); + +out: + btf__free(vmlinux_btf); + btf__free(mod_btf); + for (i = 0; i < 2; i++) { + if (btf_fd[i]) + close(btf_fd[i]); + if (prog_fd[i] > 0) + close(prog_fd[i]); + if (link_fd[i] > 0) + close(link_fd[i]); + } +} -- GitLab From 543c143dac5d64a8de8db3ccadea6d92c34fd1e6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Mar 2023 14:26:37 +0100 Subject: [PATCH 0477/2078] ptp: ines: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). This also fixes !CONFIG_OF error: drivers/ptp/ptp_ines.c:783:34: error: ‘ines_ptp_ctrl_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20230312132637.352755-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ines.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 61f47fb9d9979..ed215b4581830 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -792,7 +792,7 @@ static struct platform_driver ines_ptp_ctrl_driver = { .remove = ines_ptp_ctrl_remove, .driver = { .name = "ines_ptp_ctrl", - .of_match_table = of_match_ptr(ines_ptp_ctrl_of_match), + .of_match_table = ines_ptp_ctrl_of_match, }, }; module_platform_driver(ines_ptp_ctrl_driver); -- GitLab From 83456576a42050cc0675aece14277a3cbdee9cbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 14 Mar 2023 13:48:56 +0100 Subject: [PATCH 0478/2078] net: phy: update obsolete comment about PHY_STARTING Commit 899a3cbbf77a ("net: phy: remove states PHY_STARTING and PHY_PENDING") missed to update a comment in phy_probe. Remove superfluous "Description:" prefix while we are here. Signed-off-by: Wolfram Sang Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20230314124856.44878-1-wsa+renesas@sang-engineering.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1785f1cead971..c0760cbf534bc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3076,9 +3076,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_phy_node); * phy_probe - probe and init a PHY device * @dev: device to probe and init * - * Description: Take care of setting up the phy_device structure, - * set the state to READY (the driver's init function should - * set it to STARTING if needed). + * Take care of setting up the phy_device structure, set the state to READY. */ static int phy_probe(struct device *dev) { -- GitLab From a57cc54d69d601282c4451010b12f53630834203 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 14 Mar 2023 13:49:27 +0100 Subject: [PATCH 0479/2078] net: phy: micrel: drop superfluous use of temp variable 'temp' was used before commit c0c99d0cd107 ("net: phy: micrel: remove the use of .ack_interrupt()") refactored the code. Now, we can simplify it a little. Signed-off-by: Wolfram Sang Reviewed-by: Andrew Lunn Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230314124928.44948-1-wsa+renesas@sang-engineering.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e5b2af69ac033..0cf0ef3a96a36 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -436,11 +436,9 @@ static int kszphy_config_intr(struct phy_device *phydev) if (err) return err; - temp = KSZPHY_INTCS_ALL; - err = phy_write(phydev, MII_KSZPHY_INTCS, temp); + err = phy_write(phydev, MII_KSZPHY_INTCS, KSZPHY_INTCS_ALL); } else { - temp = 0; - err = phy_write(phydev, MII_KSZPHY_INTCS, temp); + err = phy_write(phydev, MII_KSZPHY_INTCS, 0); if (err) return err; -- GitLab From c05d145abea138dbba213ce775820af73e893d92 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 13 Mar 2023 22:42:20 -0700 Subject: [PATCH 0480/2078] net/mlx5: remove redundant clear_bit When shutdown or remove callbacks are called the driver sets the flag MLX5_BREAK_FW_WAIT, to stop waiting for FW as teardown was called. There is no need to clear the bit as once shutdown or remove were called as there is no way back, the driver is going down. Furthermore, if not cleared the flag can be used also in other loops where we may wait while teardown was already called. Use test_bit() instead of test_and_clear_bit() as there is no need to clear the flag. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 540840e80493b..0ff0eb6604950 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -191,7 +191,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, if (!(fw_initializing >> 31)) break; if (time_after(jiffies, end) || - test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { err = -EBUSY; break; } -- GitLab From 8ff38e730c3f5ee717f25365ef8aa4739562d567 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 13 Mar 2023 22:42:21 -0700 Subject: [PATCH 0481/2078] net/mlx5: Stop waiting for PCI up if teardown was triggered If driver teardown is called while PCI is turned off, there is a race between health recovery and teardown. If health recovery already started it will wait 60 sec trying to see if PCI gets back and it can recover, but actually there is no need to wait anymore once teardown was called. Use the MLX5_BREAK_FW_WAIT flag which is set on driver teardown to break waiting for PCI up. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-3-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index f9438d4e43caf..016c5f99c4706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -325,6 +325,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) while (sensor_pci_not_working(dev)) { if (time_after(jiffies, end)) return -ETIMEDOUT; + if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); + return -ENODEV; + } msleep(100); } return 0; -- GitLab From ceefcfb8a37510f2c2b99b9e8d6c0b3941687b2b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 13 Mar 2023 22:42:22 -0700 Subject: [PATCH 0482/2078] net/mlx5: Add comment to mlx5_devlink_params_register() Add comment to mlx5_devlink_params_register() functions so it is clear that only driver init params should be registered here. Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-4-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index c5d2fdcabd566..b7784e02c2ddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -805,6 +805,11 @@ int mlx5_devlink_params_register(struct devlink *devlink) { int err; + /* Here only the driver init params should be registered. + * Runtime params should be registered by the code which + * behaviour they configure. + */ + err = devl_params_register(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); if (err) -- GitLab From c1fef618d611b31964ab397aa0bf0611da94bade Mon Sep 17 00:00:00 2001 From: Sandipan Patra Date: Mon, 13 Mar 2023 22:42:23 -0700 Subject: [PATCH 0483/2078] net/mlx5: Implement thermal zone Implement thermal zone support for mlx5 based HW. The NIC uses temperature sensor provided by ASIC to report current temperature to thermal core. Signed-off-by: Sandipan Patra Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-5-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 1 + .../net/ethernet/mellanox/mlx5/core/main.c | 6 + .../net/ethernet/mellanox/mlx5/core/thermal.c | 108 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/thermal.h | 20 ++++ include/linux/mlx5/driver.h | 3 + include/linux/mlx5/mlx5_ifc.h | 26 +++++ 6 files changed, 164 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/thermal.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/thermal.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8d4e25cc54ea3..6c2f1d4a58ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -77,6 +77,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o +mlx5_core-$(CONFIG_THERMAL) += thermal.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0ff0eb6604950..644c889f9a32b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -52,6 +52,7 @@ #include #include #include "mlx5_core.h" +#include "thermal.h" #include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" @@ -1768,6 +1769,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + err = mlx5_thermal_init(dev); + if (err) + dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err); + pci_save_state(pdev); devlink_register(devlink); return 0; @@ -1796,6 +1801,7 @@ static void remove_one(struct pci_dev *pdev) set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); devlink_unregister(devlink); mlx5_sriov_disable(pdev); + mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c new file mode 100644 index 0000000000000..e47fa6fb836f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "thermal.h" + +#define MLX5_THERMAL_POLL_INT_MSEC 1000 +#define MLX5_THERMAL_NUM_TRIPS 0 +#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0 + +/* Bit string indicating the writeablility of trip points if any */ +#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1) + +struct mlx5_thermal { + struct mlx5_core_dev *mdev; + struct thermal_zone_device *tzdev; +}; + +static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id); + + err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 0); + + if (err) + return err; + + *p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature); + + return 0; +} + +static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlx5_thermal *thermal = tzdev->devdata; + struct mlx5_core_dev *mdev = thermal->mdev; + int err; + + err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp); + + if (err) + return err; + + /* The unit of temp returned is in 0.125 C. The thermal + * framework expects the value in 0.001 C. + */ + *p_temp *= 125; + + return 0; +} + +static struct thermal_zone_device_ops mlx5_thermal_ops = { + .get_temp = mlx5_thermal_get_temp, +}; + +int mlx5_thermal_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_thermal *thermal; + struct thermal_zone_device *tzd; + const char *data = "mlx5"; + + tzd = thermal_zone_get_zone_by_name(data); + if (!IS_ERR(tzd)) + return 0; + + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); + if (!thermal) + return -ENOMEM; + + thermal->mdev = mdev; + thermal->tzdev = thermal_zone_device_register(data, + MLX5_THERMAL_NUM_TRIPS, + MLX5_THERMAL_TRIP_MASK, + thermal, + &mlx5_thermal_ops, + NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); + if (IS_ERR(thermal->tzdev)) { + dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", + data, PTR_ERR(thermal->tzdev)); + kfree(thermal); + return -EINVAL; + } + + mdev->thermal = thermal; + return 0; +} + +void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) +{ + if (!mdev->thermal) + return; + + thermal_zone_device_unregister(mdev->thermal->tzdev); + kfree(mdev->thermal); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h new file mode 100644 index 0000000000000..7d752c1221922 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_THERMAL_DRIVER_H +#define __MLX5_THERMAL_DRIVER_H + +#if IS_ENABLED(CONFIG_THERMAL) +int mlx5_thermal_init(struct mlx5_core_dev *mdev); +void mlx5_thermal_uninit(struct mlx5_core_dev *mdev); +#else +static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev) +{ + mdev->thermal = NULL; + return 0; +} + +static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { } +#endif + +#endif /* __MLX5_THERMAL_DRIVER_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f33389b42209e..7a898113b6b7d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -134,6 +134,7 @@ enum { MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, @@ -731,6 +732,7 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; +struct mlx5_thermal; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -808,6 +810,7 @@ struct mlx5_core_dev { struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; + struct mlx5_thermal *thermal; }; struct mlx5_db { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 66d76e97a0876..d2c164f0778cd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10869,6 +10869,31 @@ struct mlx5_ifc_mrtc_reg_bits { u8 time_l[0x20]; }; +struct mlx5_ifc_mtmp_reg_bits { + u8 reserved_at_0[0x14]; + u8 sensor_index[0xc]; + + u8 reserved_at_20[0x10]; + u8 temperature[0x10]; + + u8 mte[0x1]; + u8 mtr[0x1]; + u8 reserved_at_42[0xe]; + u8 max_temperature[0x10]; + + u8 tee[0x2]; + u8 reserved_at_62[0xe]; + u8 temp_threshold_hi[0x10]; + + u8 reserved_at_80[0x10]; + u8 temp_threshold_lo[0x10]; + + u8 reserved_at_a0[0x20]; + + u8 sensor_name_hi[0x20]; + u8 sensor_name_lo[0x20]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -10931,6 +10956,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mfrl_reg_bits mfrl_reg; struct mlx5_ifc_mtutc_reg_bits mtutc_reg; struct mlx5_ifc_mrtc_reg_bits mrtc_reg; + struct mlx5_ifc_mtmp_reg_bits mtmp_reg; u8 reserved_at_0[0x60e0]; }; -- GitLab From aa98d15ea40b9d370871df7882b6d56d08aed692 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 13 Mar 2023 22:42:24 -0700 Subject: [PATCH 0484/2078] net/mlx5e: Utilize the entire fifo Previous check was comparing against the fifo mask. The mask is size of the fifo (power of two) minus one, so a less than or equal comparator should be used for checking if the fifo has room for the SKB. Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-6-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index b9c2f67d37941..816ea83e6413d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -86,7 +86,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); static inline bool mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) { - return (u16)(*fifo->pc - *fifo->cc) < fifo->mask; + return (u16)(*fifo->pc - *fifo->cc) <= fifo->mask; } static inline bool -- GitLab From 2b5bd5b1611b817970f13b1c4dfdcda0b35e3690 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 13 Mar 2023 22:42:25 -0700 Subject: [PATCH 0485/2078] net/mlx5e: Rename RQ/SQ adaptive moderation state flag Dynamic interrupt moderation RQ and SQ feature represented by MLX5E_RQ_STATE_AM and MLX5E_SQ_STATE_AM enums respectively, is not consistent with the feature naming in the driver, and with the formal feature and library names. Hence, change MLX5E_RQ_STATE_AM and MLX5E_SQ_STATE_AM enum type names in core/en.h to MLX5E_RQ_STATE_DIM and MLX5E_SQ_STATE_DIM respectively. Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-7-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 88460b7796e55..66bca3a6a0579 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -339,7 +339,7 @@ static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_RECOVERING, - MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_DIM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ @@ -390,7 +390,7 @@ enum { MLX5E_SQ_STATE_MPWQE, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, - MLX5E_SQ_STATE_AM, + MLX5E_SQ_STATE_DIM, MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 76a9c5194a704..5ca9fcf845864 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1188,7 +1188,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &rq->state); + __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); /* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render @@ -1664,7 +1664,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); if (params->tx_dim_enabled) - sq->state |= BIT(MLX5E_SQ_STATE_AM); + sq->state |= BIT(MLX5E_SQ_STATE_DIM); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9a458a5d98539..a50bfda18e96d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -51,7 +51,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) struct mlx5e_sq_stats *stats = sq->stats; struct dim_sample dim_sample = {}; - if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_DIM, &sq->state))) return; dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); @@ -63,7 +63,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) struct mlx5e_rq_stats *stats = rq->stats; struct dim_sample dim_sample = {}; - if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) + if (unlikely(!test_bit(MLX5E_RQ_STATE_DIM, &rq->state))) return; dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); -- GitLab From 1fe7bc109e3e18f7776ba71953cbc922b7ee9165 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 13 Mar 2023 22:42:26 -0700 Subject: [PATCH 0486/2078] net/mlx5e: Stringify RQ SW state in RQ devlink health diagnostics One of the parameters that is retrieved/printed as a response to devlink health diagnostics for rx reporter is the RQ SW state. It's printed as a bitmap decimal number. Printing it as bitmap is problematic and non informative. In addition User can't count on SW state without accessing the kernel sources (mlx5e rq state enum in en.h). This patch prints RQ SW state in a textual representation, as a key: value pairs, where disabled rq states will appear as '0' and enabled ones will appear as '1'. See below the generated output for rx health diagnostics devlink command: $ devlink health diagnose auxiliary/mlx5_core.eth.0/65535 reporter rx Before: ======================================================================= Common config: RQ: type: 2 stride size: 2048 size: 8 ts_format: FRC CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 4172 HW state: 1 SW state: 37 WQE counter: 7 posted WQEs: 7 cc: 7 CQ: cqn: 1033 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 32 vecidx: 0 ci: 2 size: 2048 ICOSQ: sqn: 4169 HW state: 1 cc: 74 pc: 74 WQE size: 128 CQ: cqn: 1030 cc: 1 size: 128 channel ix: 1 ... . . After: ======================================================================= Common config: RQ: type: 2 stride size: 2048 size: 8 ts_format: FRC CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 4172 HW state: 1 WQE counter: 7 posted WQEs: 7 cc: 7 SW State: enabled: 1 recovering: 0 am: 1 no_csum_complete: 0 csum_full: 0 mini_cqe_hw_stridx: 1 shampo: 0 mini_cqe_enhanced: 0 CQ: cqn: 1033 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 32 vecidx: 0 ci: 2 size: 2048 ICOSQ: sqn: 4169 HW state: 1 cc: 74 pc: 74 WQE size: 128 CQ: cqn: 1030 cc: 1 size: 128 channel: ix: 1 ... . . Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-8-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 ++- .../mellanox/mlx5/core/en/reporter_rx.c | 49 +++++++++++++++++-- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 66bca3a6a0579..6c01da3bad745 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -336,8 +336,11 @@ static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) params->mqprio.num_tc : 1; } +/* Keep this enum consistent with the corresponding strings array + * declared in en/reporter_rx.c + */ enum { - MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_ENABLED = 0, MLX5E_RQ_STATE_RECOVERING, MLX5E_RQ_STATE_DIM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, @@ -345,6 +348,7 @@ enum { MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */ + MLX5E_NUM_RQ_STATES, /* Must be kept last */ }; struct mlx5e_cq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index c462fe76495b5..98c87b3df806d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -8,6 +8,18 @@ #include "ptp.h" #include "lib/tout.h" +/* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */ +static const char * const rq_sw_state_type_name[] = { + [MLX5E_RQ_STATE_ENABLED] = "enabled", + [MLX5E_RQ_STATE_RECOVERING] = "recovering", + [MLX5E_RQ_STATE_DIM] = "dim", + [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete", + [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full", + [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx", + [MLX5E_RQ_STATE_SHAMPO] = "shampo", + [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced", +}; + static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { int outlen = MLX5_ST_SZ_BYTES(query_rq_out); @@ -239,6 +251,35 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES, + "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h"); + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i], + test_bit(i, &rq->state)); + if (err) + return err; + } + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} + static int mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, struct devlink_fmsg *fmsg) @@ -265,10 +306,6 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, if (err) return err; - err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); - if (err) - return err; - err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); if (err) return err; @@ -281,6 +318,10 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, if (err) return err; + err = mlx5e_health_rq_put_sw_state(fmsg, rq); + if (err) + return err; + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); if (err) return err; -- GitLab From fc9d982a2512b294607b2922de3809e7c92810da Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 13 Mar 2023 22:42:27 -0700 Subject: [PATCH 0487/2078] net/mlx5e: Expose SQ SW state as part of SQ health diagnostics Add SQ SW state textual representation to devlink health diagnostics for tx reporter. SQ SW state can be retrieved by issuing the devlink command below: $ devlink health diagnose auxiliary/mlx5_core.eth.0/65535 reporter tx Output ======================================================================= Common Config: SQ: stride size: 64 size: 1024 ts_format: FRC CQ: stride size: 64 size: 1024 SQs: channel ix: 0 tc: 0 txq ix: 0 sqn: 4170 HW state: 1 stopped: false cc: 0 pc: 0 SW State: enabled: 1 mpwqe: 1 recovering: 0 ipsec: 0 am: 1 vlan_need_l2_inline: 1 pending_xsk_tx: 0 pending_tls_rx_resync: 0 xdp_multibuf: 0 CQ: cqn: 1031 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 32 vecidx: 0 ci: 2 size: 2048 Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-9-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 ++- .../mellanox/mlx5/core/en/reporter_tx.c | 46 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6c01da3bad745..67f7e24d1f36d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -389,8 +389,11 @@ struct mlx5e_sq_dma { enum mlx5e_dma_map_type type; }; +/* Keep this enum consistent with with the corresponding strings array + * declared in en/reporter_tx.c + */ enum { - MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_ENABLED = 0, MLX5E_SQ_STATE_MPWQE, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, @@ -399,6 +402,7 @@ enum { MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, MLX5E_SQ_STATE_XDP_MULTIBUF, + MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; struct mlx5e_tx_mpwqe { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 34666e2b38716..44c1926843a1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -6,6 +6,19 @@ #include "en/devlink.h" #include "lib/tout.h" +/* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ +static const char * const sq_sw_state_type_name[] = { + [MLX5E_SQ_STATE_ENABLED] = "enabled", + [MLX5E_SQ_STATE_MPWQE] = "mpwqe", + [MLX5E_SQ_STATE_RECOVERING] = "recovering", + [MLX5E_SQ_STATE_IPSEC] = "ipsec", + [MLX5E_SQ_STATE_DIM] = "dim", + [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", + [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", + [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", + [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", +}; + static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { struct mlx5_core_dev *dev = sq->mdev; @@ -37,6 +50,35 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) sq->pc = 0; } +static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, + "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], + test_bit(i, &sq->state)); + if (err) + return err; + } + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} + static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) { struct mlx5_core_dev *mdev; @@ -190,6 +232,10 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, if (err) return err; + err = mlx5e_health_sq_put_sw_state(fmsg, sq); + if (err) + return err; + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); if (err) return err; -- GitLab From bb76d250e55c72975b77f5d610fd3733306b3031 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 13 Mar 2023 22:42:28 -0700 Subject: [PATCH 0488/2078] net/mlx5e: Add XSK RQ state flag for RQ devlink health diagnostics Currently RQ health diagnostics doesn't inform the user whether an RQ is an XSK RQ or not. Address this, by adding XSK state flag to RQ SW state enum in core/en.h. XSK will be '1' if current RQ is an XSK RQ, and it will be '0' if it's not. In this example below, it can be seen that XSK field value is '1' since xdpsock program have been attached to channel 0 before issuing the devlink query command: $ devlink health diagnose auxiliary/mlx5_core.eth.0/65535 reporter rx Output: ======================================================================= Common config: RQ: type: 2 stride size: 4096 size: 16 ts_format: FRC CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 4236 HW state: 1 WQE counter: 15 posted WQEs: 15 cc: 15 SW State: enabled: 1 recovering: 0 am: 1 no_csum_complete: 1 csum_full: 0 mini_cqe_hw_stridx: 1 shampo: 0 mini_cqe_enhanced: 0 xsk: 1 CQ: cqn: 1085 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 32 vecidx: 0 ci: 5 size: 2048 ICOSQ: sqn: 4229 HW state: 1 cc: 158 pc: 158 WQE size: 2048 CQ: cqn: 1080 cc: 1 size: 2048 Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-10-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 67f7e24d1f36d..e4db252d406c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -348,6 +348,7 @@ enum { MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */ + MLX5E_RQ_STATE_XSK, /* set to indicate an xsk rq */ MLX5E_NUM_RQ_STATES, /* Must be kept last */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 98c87b3df806d..b621f735cdc35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -18,6 +18,7 @@ static const char * const rq_sw_state_type_name[] = { [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx", [MLX5E_RQ_STATE_SHAMPO] = "shampo", [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced", + [MLX5E_RQ_STATE_XSK] = "xsk", }; static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 81a567e172646..ed279f4509761 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -93,13 +93,19 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) { + struct mlx5e_rq *xskrq = &c->xskrq; int err; - err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq); if (err) return err; - return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); + err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq); + if (err) + return err; + + __set_bit(MLX5E_RQ_STATE_XSK, &xskrq->state); + return 0; } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, -- GitLab From 028522e2844393abc44f7bd7477eb4a455f01579 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Mar 2023 22:42:29 -0700 Subject: [PATCH 0489/2078] net/mlx5: Move needed PTYS functions to core layer Downstream patches require devlink params to access the PTYS register, move the needed functions from mlx5e to the core layer. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-11-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/params.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en/port.c | 157 +----------------- .../net/ethernet/mellanox/mlx5/core/en/port.h | 14 -- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 12 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- .../net/ethernet/mellanox/mlx5/core/port.c | 151 +++++++++++++++++ include/linux/mlx5/port.h | 16 ++ 8 files changed, 179 insertions(+), 177 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index a21bd1179477b..561da78d3b5c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -553,7 +553,7 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev) u32 link_speed = 0; u32 pci_bw = 0; - mlx5e_port_max_linkspeed(mdev, &link_speed); + mlx5_port_max_linkspeed(mdev, &link_speed); pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", link_speed, pci_bw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 505ba41195b93..dbe2b19a9570e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -32,101 +32,6 @@ #include "port.h" -/* speed in units of 1Mb */ -static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = 1000, - [MLX5E_1000BASE_KX] = 1000, - [MLX5E_10GBASE_CX4] = 10000, - [MLX5E_10GBASE_KX4] = 10000, - [MLX5E_10GBASE_KR] = 10000, - [MLX5E_20GBASE_KR2] = 20000, - [MLX5E_40GBASE_CR4] = 40000, - [MLX5E_40GBASE_KR4] = 40000, - [MLX5E_56GBASE_R4] = 56000, - [MLX5E_10GBASE_CR] = 10000, - [MLX5E_10GBASE_SR] = 10000, - [MLX5E_10GBASE_ER] = 10000, - [MLX5E_40GBASE_SR4] = 40000, - [MLX5E_40GBASE_LR4] = 40000, - [MLX5E_50GBASE_SR2] = 50000, - [MLX5E_100GBASE_CR4] = 100000, - [MLX5E_100GBASE_SR4] = 100000, - [MLX5E_100GBASE_KR4] = 100000, - [MLX5E_100GBASE_LR4] = 100000, - [MLX5E_100BASE_TX] = 100, - [MLX5E_1000BASE_T] = 1000, - [MLX5E_10GBASE_T] = 10000, - [MLX5E_25GBASE_CR] = 25000, - [MLX5E_25GBASE_KR] = 25000, - [MLX5E_25GBASE_SR] = 25000, - [MLX5E_50GBASE_CR2] = 50000, - [MLX5E_50GBASE_KR2] = 50000, -}; - -static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { - [MLX5E_SGMII_100M] = 100, - [MLX5E_1000BASE_X_SGMII] = 1000, - [MLX5E_5GBASE_R] = 5000, - [MLX5E_10GBASE_XFI_XAUI_1] = 10000, - [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, - [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, - [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, - [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, - [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, - [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, - [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, - [MLX5E_400GAUI_8] = 400000, - [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, - [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, - [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, -}; - -bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev) -{ - struct mlx5e_port_eth_proto eproto; - int err; - - if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) - return true; - - err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); - if (err) - return false; - - return !!eproto.cap; -} - -static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, - const u32 **arr, u32 *size, - bool force_legacy) -{ - bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev); - - *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : - ARRAY_SIZE(mlx5e_link_speed); - *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; -} - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5e_port_eth_proto *eproto) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - if (!eproto) - return -EINVAL; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); - if (err) - return err; - - eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, - eth_proto_capability); - eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); - eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); - return 0; -} - void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin) { @@ -172,30 +77,14 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, sizeof(out), MLX5_REG_PTYS, 0, 1); } -u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy) -{ - unsigned long temp = eth_proto_oper; - const u32 *table; - u32 speed = 0; - u32 max_size; - int i; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); - i = find_first_bit(&temp, max_size); - if (i < max_size) - speed = table[i]; - return speed; -} - int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - struct mlx5e_port_eth_proto eproto; + struct mlx5_port_eth_proto eproto; bool force_legacy = false; bool ext; int err; - ext = mlx5e_ptys_ext_supported(mdev); + ext = mlx5_ptys_ext_supported(mdev); err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) goto out; @@ -205,7 +94,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) goto out; } - *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy); + *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy); if (!(*speed)) err = -EINVAL; @@ -213,46 +102,6 @@ out: return err; } -int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) -{ - struct mlx5e_port_eth_proto eproto; - u32 max_speed = 0; - const u32 *table; - u32 max_size; - bool ext; - int err; - int i; - - ext = mlx5e_ptys_ext_supported(mdev); - err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); - if (err) - return err; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); - for (i = 0; i < max_size; ++i) - if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, table[i]); - - *speed = max_speed; - return 0; -} - -u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy) -{ - u32 link_modes = 0; - const u32 *table; - u32 max_size; - int i; - - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); - for (i = 0; i < max_size; ++i) { - if (table[i] == speed) - link_modes |= MLX5E_PROT_MASK(i); - } - return link_modes; -} - int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) { int sz = MLX5_ST_SZ_BYTES(pbmc_reg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 3f474e370828d..d1da225f35dad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,25 +36,11 @@ #include #include "en.h" -struct mlx5e_port_eth_proto { - u32 cap; - u32 admin; - u32 oper; -}; - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5e_port_eth_proto *eproto); void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, bool ext); -u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy); -bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7708acc9b2ab3..53c35147f29b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -220,7 +220,7 @@ static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, struct ptys2ethtool_config **arr, u32 *size) { - bool ext = mlx5e_ptys_ext_supported(mdev); + bool ext = mlx5_ptys_ext_supported(mdev); *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : @@ -895,7 +895,7 @@ static void get_speed_duplex(struct net_device *netdev, if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); + speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); if (!speed) { if (data_rate_oper) speed = 100 * data_rate_oper; @@ -980,7 +980,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; - bool ext = mlx5e_ptys_ext_supported(mdev); + bool ext = mlx5_ptys_ext_supported(mdev); ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } @@ -1160,7 +1160,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_port_eth_proto eproto; + struct mlx5_port_eth_proto eproto; const unsigned long *adver; bool an_changes = false; u8 an_disable_admin; @@ -1180,7 +1180,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, autoneg = link_ksettings->base.autoneg; speed = link_ksettings->base.speed; - ext_supported = mlx5e_ptys_ext_supported(mdev); + ext_supported = mlx5_ptys_ext_supported(mdev); ext = ext_requested(autoneg, adver, ext_supported); if (!ext_supported && ext) return -EOPNOTSUPP; @@ -1194,7 +1194,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : - mlx5e_port_speed2linkmodes(mdev, speed, !ext); + mlx5_port_speed2linkmodes(mdev, speed, !ext); err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 70b8d2dfa751f..79dd8ad5ede7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1108,7 +1108,7 @@ mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, hairpin_params->mdev = mdev; /* set hairpin pair per each 50Gbs share of the link */ - mlx5e_port_max_linkspeed(mdev, &link_speed); + mlx5_port_max_linkspeed(mdev, &link_speed); link_speed = max_t(u32, link_speed, 50000); link_speed64 = link_speed; do_div(link_speed64, 50000); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 75015d370922e..7c79476cc5f95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -744,7 +744,7 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * u64 value; int err; - err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + err = mlx5_port_max_linkspeed(mdev, &link_speed_max); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a1548e6bfb35d..0daeb4b72cca7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1054,3 +1054,154 @@ out: kfree(out); return err; } + +/* speed in units of 1Mb */ +static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = 1000, + [MLX5E_1000BASE_KX] = 1000, + [MLX5E_10GBASE_CX4] = 10000, + [MLX5E_10GBASE_KX4] = 10000, + [MLX5E_10GBASE_KR] = 10000, + [MLX5E_20GBASE_KR2] = 20000, + [MLX5E_40GBASE_CR4] = 40000, + [MLX5E_40GBASE_KR4] = 40000, + [MLX5E_56GBASE_R4] = 56000, + [MLX5E_10GBASE_CR] = 10000, + [MLX5E_10GBASE_SR] = 10000, + [MLX5E_10GBASE_ER] = 10000, + [MLX5E_40GBASE_SR4] = 40000, + [MLX5E_40GBASE_LR4] = 40000, + [MLX5E_50GBASE_SR2] = 50000, + [MLX5E_100GBASE_CR4] = 100000, + [MLX5E_100GBASE_SR4] = 100000, + [MLX5E_100GBASE_KR4] = 100000, + [MLX5E_100GBASE_LR4] = 100000, + [MLX5E_100BASE_TX] = 100, + [MLX5E_1000BASE_T] = 1000, + [MLX5E_10GBASE_T] = 10000, + [MLX5E_25GBASE_CR] = 25000, + [MLX5E_25GBASE_KR] = 25000, + [MLX5E_25GBASE_SR] = 25000, + [MLX5E_50GBASE_CR2] = 50000, + [MLX5E_50GBASE_KR2] = 50000, +}; + +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev) +{ + struct mlx5_port_eth_proto eproto; + int err; + + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) + return true; + + err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); + if (err) + return false; + + return !!eproto.cap; +} + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size, + bool force_legacy) +{ + bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy) +{ + unsigned long temp = eth_proto_oper; + const u32 *table; + u32 speed = 0; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; + return speed; +} + +u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy) +{ + u32 link_modes = 0; + const u32 *table; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) + link_modes |= MLX5E_PROT_MASK(i); + } + return link_modes; +} + +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5_port_eth_proto eproto; + u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; + int err; + int i; + + ext = mlx5_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + return err; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); + + *speed = max_speed; + return 0; +} diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e96ee1e348cbc..98b2e1e149f93 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,6 +141,12 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; +struct mlx5_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + #define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -218,4 +224,14 @@ int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto); +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); +u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy); +u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy); +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); + #endif /* __MLX5_PORT_H__ */ -- GitLab From 1bffcea42926b26e092045ac398850e80d950bb2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Mar 2023 22:42:30 -0700 Subject: [PATCH 0490/2078] net/mlx5e: Add devlink hairpin queues parameters We refer to a TC NIC rule that involves forwarding as "hairpin". Hairpin queues are mlx5 hardware specific implementation for hardware forwarding of such packets. Per the discussion in [1], move the hairpin queues control (number and size) from debugfs to devlink. Expose two devlink params: - hairpin_num_queues: control the number of hairpin queues - hairpin_queue_size: control the size (in packets) of the hairpin queues [1] https://lore.kernel.org/all/20230111194608.7f15b9a1@kernel.org/ Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-12-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/devlink.rst | 35 ++++++++++ Documentation/networking/devlink/mlx5.rst | 12 ++++ .../net/ethernet/mellanox/mlx5/core/devlink.c | 66 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/devlink.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 50 ++++++-------- 5 files changed, 134 insertions(+), 31 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 9b5c40ba7f0da..0995e4e5acd78 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -122,6 +122,41 @@ users try to enable them. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev +hairpin_num_queues: Number of hairpin queues +-------------------------------------------- +We refer to a TC NIC rule that involves forwarding as "hairpin". + +Hairpin queues are mlx5 hardware specific implementation for hardware +forwarding of such packets. + +- Show the number of hairpin queues:: + + $ devlink dev param show pci/0000:06:00.0 name hairpin_num_queues + pci/0000:06:00.0: + name hairpin_num_queues type driver-specific + values: + cmode driverinit value 2 + +- Change the number of hairpin queues:: + + $ devlink dev param set pci/0000:06:00.0 name hairpin_num_queues value 4 cmode driverinit + +hairpin_queue_size: Size of the hairpin queues +---------------------------------------------- +Control the size of the hairpin queues. + +- Show the size of the hairpin queues:: + + $ devlink dev param show pci/0000:06:00.0 name hairpin_queue_size + pci/0000:06:00.0: + name hairpin_queue_size type driver-specific + values: + cmode driverinit value 1024 + +- Change the size (in packets) of the hairpin queues:: + + $ devlink dev param set pci/0000:06:00.0 name hairpin_queue_size value 512 cmode driverinit + Health reporters ================ diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 3321117cf6057..202798d6501e7 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -72,6 +72,18 @@ parameters. Default: disabled + * - ``hairpin_num_queues`` + - u32 + - driverinit + - We refer to a TC NIC rule that involves forwarding as "hairpin". + Hairpin queues are mlx5 hardware specific implementation for hardware + forwarding of such packets. + + Control the number of hairpin queues. + * - ``hairpin_queue_size`` + - u32 + - driverinit + - Control the size (in packets) of the hairpin queues. The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index b7784e02c2ddd..1ee2a472e1d21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -494,6 +494,61 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; } +static int +mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return val.vu32 ? 0 : -EINVAL; +} + +static int +mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 val32 = val.vu32; + + if (!is_power_of_2(val32)) { + NL_SET_ERR_MSG_MOD(extack, "Value is not power of two"); + return -EINVAL; + } + + if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) { + NL_SET_ERR_MSG_FMT_MOD( + extack, "Maximum hairpin queue size is %lu", + BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + return -EINVAL; + } + + return 0; +} + +static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + u64 link_speed64; + u32 link_speed; + + /* set hairpin pair per each 50Gbs share of the link */ + mlx5_port_max_linkspeed(dev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + + value.vu32 = link_speed64; + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value); + + value.vu32 = + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev), + MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value); +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_enable_roce_validate), @@ -547,6 +602,14 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) static const struct devlink_param mlx5_devlink_eth_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_num_queues_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, + "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_queue_size_validate), }; static int mlx5_devlink_eth_params_register(struct devlink *devlink) @@ -567,6 +630,9 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); + + mlx5_devlink_hairpin_params_init_values(devlink); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 212b12424146a..5dcfb4d86d8a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -12,6 +12,8 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 79dd8ad5ede7f..2e6351ef4d9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,6 +44,7 @@ #include #include #include +#include "devlink.h" #include "en.h" #include "en/tc/post_act.h" #include "en/tc/act_stats.h" @@ -73,12 +74,6 @@ #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) -struct mlx5e_hairpin_params { - struct mlx5_core_dev *mdev; - u32 num_queues; - u32 queue_size; -}; - struct mlx5e_tc_table { /* Protects the dynamic assignment of the t parameter * which is the nic tc root table. @@ -101,7 +96,6 @@ struct mlx5e_tc_table { struct mlx5_tc_ct_priv *ct; struct mapping_ctx *mapping; - struct mlx5e_hairpin_params hairpin_params; struct dentry *dfs_root; /* tc action stats */ @@ -1099,33 +1093,15 @@ static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, &debugfs_hairpin_table_dump_fops); } -static void -mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, - struct mlx5_core_dev *mdev) -{ - u64 link_speed64; - u32 link_speed; - - hairpin_params->mdev = mdev; - /* set hairpin pair per each 50Gbs share of the link */ - mlx5_port_max_linkspeed(mdev, &link_speed); - link_speed = max_t(u32, link_speed, 50000); - link_speed64 = link_speed; - do_div(link_speed64, 50000); - hairpin_params->num_queues = link_speed64; - - hairpin_params->queue_size = - BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), - MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets))); -} - static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct devlink *devlink = priv_to_devlink(priv->mdev); int peer_ifindex = parse_attr->mirred_ifindex[0]; + union devlink_param_value val = {}; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; @@ -1182,7 +1158,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hash_hairpin_info(peer_id, match_prio)); mutex_unlock(&tc->hairpin_tbl_lock); - params.log_num_packets = ilog2(tc->hairpin_params.queue_size); + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.log_num_packets = ilog2(val.vu32); params.log_data_size = clamp_t(u32, params.log_num_packets + @@ -1191,7 +1174,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.q_counter = priv->q_counter; - params.num_channels = tc->hairpin_params.num_queues; + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.num_channels = val.vu32; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); hpe->hp = hp; @@ -5289,8 +5279,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); - mlx5e_hairpin_params_init(&tc->hairpin_params, dev); - tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, -- GitLab From 8a0594c0961012ac07112f41800da29dd5632c03 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Mar 2023 22:42:31 -0700 Subject: [PATCH 0491/2078] net/mlx5e: Add more information to hairpin table dump Print the number of hairpin queues and size as part of the hairpin table dump. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-13-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2e6351ef4d9c6..a139b5e88e2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -583,6 +583,7 @@ struct mlx5e_hairpin { struct mlx5e_tir direct_tir; int num_channels; + u8 log_num_packets; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5_ttc_table *ttc; @@ -929,6 +930,7 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params hp->func_mdev = func_mdev; hp->func_priv = priv; hp->num_channels = params->num_channels; + hp->log_num_packets = params->log_num_packets; err = mlx5e_hairpin_create_transport(hp); if (err) @@ -1070,9 +1072,11 @@ static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) mutex_lock(&tc->hairpin_tbl_lock); hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) - seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n", + seq_printf(file, + "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", hpe->peer_vhca_id, hpe->prio, - refcount_read(&hpe->refcnt)); + refcount_read(&hpe->refcnt), hpe->hp->num_channels, + BIT(hpe->hp->log_num_packets)); mutex_unlock(&tc->hairpin_tbl_lock); return 0; -- GitLab From 244fd698207f0baec062da4737e31c76b1be2371 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 13 Mar 2023 22:42:32 -0700 Subject: [PATCH 0492/2078] net/mlx5e: TC, Extract indr setup block checks to function In preparation for next patch which will add new check if device block can be setup, extract all existing checks to function to make it more readable and maintainable. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-14-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 58 ++++++++++++------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 8f7452dc00ee3..b4af006dc4940 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -426,39 +426,53 @@ static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev) return macvlan->mode == MACVLAN_MODE_PASSTHRU; } -static int -mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f, - flow_setup_cb_t *setup_cb, - void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) +static bool +mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev, + struct flow_block_offload *f) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - bool is_ovs_int_port = netif_is_ovs_master(netdev); - struct mlx5e_rep_indr_block_priv *indr_priv; - struct flow_block_cb *block_cb; - if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && - !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) && - !is_ovs_int_port) { - if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev)) - return -EOPNOTSUPP; + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return false; + + if (mlx5e_tc_tun_device_to_offload(priv, netdev)) + return true; + + if (is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) + return true; + + if (netif_is_macvlan(netdev)) { if (!mlx5e_rep_macvlan_mode_supported(netdev)) { netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode"); - return -EOPNOTSUPP; + return false; } + + if (macvlan_dev_real_dev(netdev) == rpriv->netdev) + return true; } - if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && - f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) - return -EOPNOTSUPP; + if (netif_is_ovs_master(netdev) && f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + mlx5e_tc_int_port_supported(esw)) + return true; - if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port) - return -EOPNOTSUPP; + return false; +} + +static int +mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; - if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw)) + if (!mlx5e_rep_check_indr_block_supported(rpriv, netdev, f)) return -EOPNOTSUPP; f->unlocked_driver_cb = true; -- GitLab From d5d006bb27ad9d408074eef7e091fbd87335f913 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 13 Mar 2023 22:42:33 -0700 Subject: [PATCH 0493/2078] net/mlx5e: Enable TC offload for ingress MACVLAN over bond Support offloading of TC rules that filter ingress traffic from a MACVLAN device, which is attached to bond device. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-15-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index b4af006dc4940..19c4a83982ca5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -433,6 +433,7 @@ mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv, { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *macvlan_real_dev; if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) @@ -450,7 +451,11 @@ mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv, return false; } - if (macvlan_dev_real_dev(netdev) == rpriv->netdev) + macvlan_real_dev = macvlan_dev_real_dev(netdev); + + if (macvlan_real_dev == rpriv->netdev) + return true; + if (netif_is_bond_master(macvlan_real_dev)) return true; } -- GitLab From 63b02048f9a781e01e59743cd2d3e0a1f10c971a Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 13 Mar 2023 22:42:34 -0700 Subject: [PATCH 0494/2078] net/mlx5e: Enable TC offload for egress MACVLAN over bond Support offloading of TC rules that mirror/redirect egress traffic to a MACVLAN device, which is attached to bond device which master mlx5 devices. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-16-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 07cc65596f891..291193f7120d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -234,6 +234,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack)) return -EOPNOTSUPP; + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + out_dev = get_fdb_out_dev(uplink_dev, out_dev); if (!out_dev) return -ENODEV; @@ -250,9 +253,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, return err; } - if (netif_is_macvlan(out_dev)) - out_dev = macvlan_dev_real_dev(out_dev); - err = verify_uplink_forwarding(priv, attr, out_dev, extack); if (err) return err; -- GitLab From 8a8db7aeaa6dae4ddae033858bf5b49aecfbbe42 Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:05 +0200 Subject: [PATCH 0495/2078] nfp: flower: add get_flow_act_ct() for ct action CT action is a special case different from other actions, CT clear action is not required when get ct action, but this case is not considered. If CT clear action in the flow rule, skip the CT clear action when get ct action, return the first ct action that is not a CT clear action Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index d23830b5bcb86..a54d374788e16 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1656,6 +1656,22 @@ void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry) kfree(entry); } +static struct flow_action_entry *get_flow_act_ct(struct flow_rule *rule) +{ + struct flow_action_entry *act; + int i; + + /* More than one ct action may be present in a flow rule, + * Return the first one that is not a CT clear action + */ + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_CT && act->ct.action != TCA_CT_ACT_CLEAR) + return act; + } + + return NULL; +} + static struct flow_action_entry *get_flow_act(struct flow_rule *rule, enum flow_action_id act_id) { @@ -1720,7 +1736,7 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, struct nfp_fl_ct_zone_entry *zt; int err; - ct_act = get_flow_act(flow->rule, FLOW_ACTION_CT); + ct_act = get_flow_act_ct(flow->rule); if (!ct_act) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action empty in conntrack offload"); -- GitLab From cee7b339d806f71df611087724ddfbaad2d8f8b7 Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:06 +0200 Subject: [PATCH 0496/2078] nfp: flower: refactor function "is_pre_ct_flow" In the scenario of multiple ct zones, ct state key match and ct action is present in one flow rule, the flow rule is classified to post_ct_flow in design. There is no ct state key match for pre ct flow, the judging condition is added to function "is_pre_ct_flow". Chain_index is another field for judging which flows are pre ct flow If chain_index not 0, the flow is not pre ct flow. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- .../net/ethernet/netronome/nfp/flower/conntrack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index a54d374788e16..e0d6c87542729 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -55,9 +55,21 @@ static void *get_hashentry(struct rhashtable *ht, void *key, bool is_pre_ct_flow(struct flow_cls_offload *flow) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; struct flow_action_entry *act; + struct flow_match_ct ct; int i; + if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + if (ct.key->ct_state) + return false; + } + + if (flow->common.chain_index) + return false; + flow_action_for_each(i, act, &flow->rule->action) { if (act->id == FLOW_ACTION_CT) { /* The pre_ct rule only have the ct or ct nat action, cannot -- GitLab From 0b8d953cce26287a7f5eb78ff49496e9836c88ed Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:07 +0200 Subject: [PATCH 0497/2078] nfp: flower: refactor function "is_post_ct_flow" 'ct_clear' action only or no ct action is supported for 'post_ct_flow'. But in scenario of multiple ct zones, one non 'ct_clear' ct action or more ct actions, including 'ct_clear action', may be present in one flow rule. If ct state match key is 'ct_established', the flow rule is still expected to be classified as 'post_ct_flow'. Check ct status first in function "is_post_ct_flow" to achieve this. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index e0d6c87542729..6b90b922bac0e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -94,24 +94,23 @@ bool is_post_ct_flow(struct flow_cls_offload *flow) struct flow_match_ct ct; int i; - /* post ct entry cannot contains any ct action except ct_clear. */ - flow_action_for_each(i, act, &flow->rule->action) { - if (act->id == FLOW_ACTION_CT) { - /* ignore ct clear action. */ - if (act->ct.action == TCA_CT_ACT_CLEAR) { - exist_ct_clear = true; - continue; - } - - return false; - } - } - if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { flow_rule_match_ct(rule, &ct); if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) return true; } else { + /* post ct entry cannot contains any ct action except ct_clear. */ + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* ignore ct clear action. */ + if (act->ct.action == TCA_CT_ACT_CLEAR) { + exist_ct_clear = true; + continue; + } + + return false; + } + } /* when do nat with ct, the post ct entry ignore the ct status, * will match the nat field(sip/dip) instead. In this situation, * the flow chain index is not zero and contains ct clear action. -- GitLab From 3e44d19934b92398785b3ffc2353b9eba264140e Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:08 +0200 Subject: [PATCH 0498/2078] nfp: flower: add goto_chain_index for ct entry The chain_index has different means in pre ct entry and post ct entry. In pre ct entry, it means chain index, but in post ct entry, it means goto chain index, it is confused. chain_index and goto_chain_index may be present in one flow rule, It cannot be distinguished by one field chain_index, both chain_index and goto_chain_index are required in the follow-up patch to support multiple ct zones Another field goto_chain_index is added to record the goto chain index. If no goto action in post ct entry, goto_chain_index is 0. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 8 ++++++-- drivers/net/ethernet/netronome/nfp/flower/conntrack.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 6b90b922bac0e..86ea8cbc67a2e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1254,7 +1254,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ - if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) return -EINVAL; err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); @@ -1783,7 +1783,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, if (IS_ERR(ct_entry)) return PTR_ERR(ct_entry); ct_entry->type = CT_TYPE_PRE_CT; - ct_entry->chain_index = ct_goto->chain_index; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1806,6 +1807,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, struct nfp_fl_ct_zone_entry *zt; bool wildcarded = false; struct flow_match_ct ct; + struct flow_action_entry *ct_goto; flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { @@ -1830,6 +1832,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_POST_CT; ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; list_add(&ct_entry->list_node, &zt->post_ct_list); zt->post_ct_count++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 762c0b36e269b..9440ab776ecea 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { * @cookie: Flow cookie, same as original TC flow, used as key * @list_node: Used by the list * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to @@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; + u32 goto_chain_index; enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; -- GitLab From 46a83c85b683b647d86e679e2b095494e87f4d7c Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:09 +0200 Subject: [PATCH 0499/2078] nfp: flower: prepare for parameterisation of number of offload rules The fixed number of offload flow rule is only supported scenario of one ct zone, in the scenario of multiple ct zones, dynamic number and more number of offload flow rules are required. In order to support scenario of multiple ct zones, parameter num_rules is added for to offload flow rules Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 86ea8cbc67a2e..ecffb6b0f3a19 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -693,34 +693,34 @@ static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u3 static int nfp_fl_merge_actions_offload(struct flow_rule **rules, struct nfp_flower_priv *priv, struct net_device *netdev, - struct nfp_fl_payload *flow_pay) + struct nfp_fl_payload *flow_pay, + int num_rules) { enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE; struct flow_action_entry *a_in; - int i, j, num_actions, id; + int i, j, id, num_actions = 0; struct flow_rule *a_rule; int err = 0, offset = 0; - num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries + - rules[CT_TYPE_NFT]->action.num_entries + - rules[CT_TYPE_POST_CT]->action.num_entries; + for (i = 0; i < num_rules; i++) + num_actions += rules[i]->action.num_entries; /* Add one action to make sure there is enough room to add an checksum action * when do nat. */ - a_rule = flow_rule_alloc(num_actions + 1); + a_rule = flow_rule_alloc(num_actions + (num_rules / 2)); if (!a_rule) return -ENOMEM; - /* Actions need a BASIC dissector. */ - a_rule->match = rules[CT_TYPE_PRE_CT]->match; /* post_ct entry have one action at least. */ - if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) { - tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats; - } + if (rules[num_rules - 1]->action.num_entries != 0) + tmp_stats = rules[num_rules - 1]->action.entries[0].hw_stats; + + /* Actions need a BASIC dissector. */ + a_rule->match = rules[0]->match; /* Copy actions */ - for (j = 0; j < _CT_TYPE_MAX; j++) { + for (j = 0; j < num_rules; j++) { u32 csum_updated = 0; u8 ip_proto = 0; @@ -758,8 +758,9 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules, /* nft entry is generated by tc ct, which mangle action do not care * the stats, inherit the post entry stats to meet the * flow_action_hw_stats_check. + * nft entry flow rules are at odd array index. */ - if (j == CT_TYPE_NFT) { + if (j & 0x01) { if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) a_in->hw_stats = tmp_stats; nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated); @@ -801,6 +802,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) struct nfp_fl_payload *flow_pay; struct flow_rule *rules[_CT_TYPE_MAX]; + int num_rules = _CT_TYPE_MAX; u8 *key, *msk, *kdata, *mdata; struct nfp_port *port = NULL; struct net_device *netdev; @@ -820,7 +822,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) memset(&key_map, 0, sizeof(key_map)); /* Calculate the resultant key layer and size for offload */ - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { err = nfp_flower_calculate_key_layers(priv->app, m_entry->netdev, &tmp_layer, rules[i], @@ -886,7 +888,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) * that the layer is not present. */ if (!qinq_sup) { - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { offset = key_map[FLOW_PAY_META_TCI]; key = kdata + offset; msk = mdata + offset; @@ -900,7 +902,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_MAC_MPLS]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, (struct nfp_flower_mac_mpls *)msk, rules[i]); @@ -916,7 +918,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_IPV4]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, (struct nfp_flower_ipv4 *)msk, rules[i]); @@ -927,7 +929,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_IPV6]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, (struct nfp_flower_ipv6 *)msk, rules[i]); @@ -938,7 +940,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_L4]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, (struct nfp_flower_tp_ports *)msk, rules[i]); @@ -949,7 +951,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_QINQ]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, (struct nfp_flower_vlan *)msk, rules[i]); @@ -965,7 +967,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) struct nfp_ipv6_addr_entry *entry; struct in6_addr *dst; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv6_gre_tun((void *)key, (void *)msk, rules[i]); } @@ -982,7 +984,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) } else { __be32 dst; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv4_gre_tun((void *)key, (void *)msk, rules[i]); } @@ -1006,7 +1008,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) struct nfp_ipv6_addr_entry *entry; struct in6_addr *dst; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv6_udp_tun((void *)key, (void *)msk, rules[i]); } @@ -1023,7 +1025,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) } else { __be32 dst; - for (i = 0; i < _CT_TYPE_MAX; i++) { + for (i = 0; i < num_rules; i++) { nfp_flower_compile_ipv4_udp_tun((void *)key, (void *)msk, rules[i]); } @@ -1040,13 +1042,13 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) offset = key_map[FLOW_PAY_GENEVE_OPT]; key = kdata + offset; msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) + for (i = 0; i < num_rules; i++) nfp_flower_compile_geneve_opt(key, msk, rules[i]); } } /* Merge actions into flow_pay */ - err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay); + err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay, num_rules); if (err) goto ct_offload_err; -- GitLab From a87ceb3d42afebd86191054314ef1a2f98c4556b Mon Sep 17 00:00:00 2001 From: Wentao Jia Date: Tue, 14 Mar 2023 08:36:10 +0200 Subject: [PATCH 0500/2078] nfp: flower: offload tc flows of multiple conntrack zones If goto_chain action present in the post ct flow rule, merge flow rules in this ct-zone, create a new pre_ct entry as the pre ct flow rule of next ct-zone, but do not offload merged flow rules to firmware. Repeat the process in the next ct-zone until no goto_chain action present in the post ct flow rule in a certain ct-zone, merged all the flow rules. Offload to firmware finally. Signed-off-by: Wentao Jia Acked-by: Simon Horman Signed-off-by: Louis Peens Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 145 +++++++++++++++--- .../ethernet/netronome/nfp/flower/conntrack.h | 30 +++- .../ethernet/netronome/nfp/flower/offload.c | 2 +- 3 files changed, 154 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index ecffb6b0f3a19..73032173ac4eb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -522,6 +522,21 @@ static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in, return 0; } +/* Extra check for multiple ct-zones merge + * currently surpport nft entries merge check in different zones + */ +static int nfp_ct_merge_extra_check(struct nfp_fl_ct_flow_entry *nft_entry, + struct nfp_fl_ct_tc_merge *tc_m_entry) +{ + struct nfp_fl_nft_tc_merge *prev_nft_m_entry; + struct nfp_fl_ct_flow_entry *pre_ct_entry; + + pre_ct_entry = tc_m_entry->pre_ct_parent; + prev_nft_m_entry = pre_ct_entry->prev_m_entries[pre_ct_entry->num_prev_m_entries - 1]; + + return nfp_ct_merge_check(prev_nft_m_entry->nft_parent, nft_entry); +} + static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, struct nfp_fl_ct_flow_entry *post_ct_entry, struct nfp_fl_ct_flow_entry *nft_entry) @@ -796,27 +811,34 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; struct nfp_fl_ct_zone_entry *zt = m_entry->zt; + struct flow_rule *rules[NFP_MAX_ENTRY_RULES]; + struct nfp_fl_ct_flow_entry *pre_ct_entry; struct nfp_fl_key_ls key_layer, tmp_layer; struct nfp_flower_priv *priv = zt->priv; u16 key_map[_FLOW_PAY_LAYERS_MAX]; struct nfp_fl_payload *flow_pay; - - struct flow_rule *rules[_CT_TYPE_MAX]; - int num_rules = _CT_TYPE_MAX; u8 *key, *msk, *kdata, *mdata; struct nfp_port *port = NULL; + int num_rules, err, i, j = 0; struct net_device *netdev; bool qinq_sup; u32 port_id; u16 offset; - int i, err; netdev = m_entry->netdev; qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); - rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule; - rules[CT_TYPE_NFT] = m_entry->nft_parent->rule; - rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule; + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + num_rules = pre_ct_entry->num_prev_m_entries * 2 + _CT_TYPE_MAX; + + for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) { + rules[j++] = pre_ct_entry->prev_m_entries[i]->tc_m_parent->pre_ct_parent->rule; + rules[j++] = pre_ct_entry->prev_m_entries[i]->nft_parent->rule; + } + + rules[j++] = m_entry->tc_m_parent->pre_ct_parent->rule; + rules[j++] = m_entry->nft_parent->rule; + rules[j++] = m_entry->tc_m_parent->post_ct_parent->rule; memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); memset(&key_map, 0, sizeof(key_map)); @@ -1181,6 +1203,12 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, if (err) return err; + if (pre_ct_entry->num_prev_m_entries > 0) { + err = nfp_ct_merge_extra_check(nft_entry, tc_m_entry); + if (err) + return err; + } + /* Combine tc_merge and nft cookies for this cookie. */ new_cookie[0] = tc_m_entry->cookie[0]; new_cookie[1] = tc_m_entry->cookie[1]; @@ -1211,11 +1239,6 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children); list_add(&nft_m_entry->nft_flow_list, &nft_entry->children); - /* Generate offload structure and send to nfp */ - err = nfp_fl_ct_add_offload(nft_m_entry); - if (err) - goto err_nft_ct_offload; - err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node, nfp_nft_ct_merge_params); if (err) @@ -1223,12 +1246,20 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, zt->nft_merge_count++; + if (post_ct_entry->goto_chain_index > 0) + return nfp_fl_create_new_pre_ct(nft_m_entry); + + /* Generate offload structure and send to nfp */ + err = nfp_fl_ct_add_offload(nft_m_entry); + if (err) + goto err_nft_ct_offload; + return err; -err_nft_ct_merge_insert: +err_nft_ct_offload: nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie, nft_m_entry->netdev); -err_nft_ct_offload: +err_nft_ct_merge_insert: list_del(&nft_m_entry->tc_merge_list); list_del(&nft_m_entry->nft_flow_list); kfree(nft_m_entry); @@ -1474,7 +1505,7 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, entry->zt = zt; entry->netdev = netdev; - entry->cookie = flow->cookie; + entry->cookie = flow->cookie > 0 ? flow->cookie : (unsigned long)entry; entry->chain_index = flow->common.chain_index; entry->tun_offset = NFP_FL_CT_NO_TUN; @@ -1514,6 +1545,9 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, INIT_LIST_HEAD(&entry->children); + if (flow->cookie == 0) + return entry; + /* Now add a ct map entry to flower-priv */ map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie, nfp_ct_map_params, sizeof(*map)); @@ -1572,6 +1606,14 @@ static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry) list_del(&m_entry->tc_merge_list); list_del(&m_entry->nft_flow_list); + if (m_entry->next_pre_ct_entry) { + struct nfp_fl_ct_map_entry pre_ct_map_ent; + + pre_ct_map_ent.ct_entry = m_entry->next_pre_ct_entry; + pre_ct_map_ent.cookie = 0; + nfp_fl_ct_del_flow(&pre_ct_map_ent); + } + kfree(m_entry); } @@ -1742,7 +1784,8 @@ nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry, int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, struct net_device *netdev, struct flow_cls_offload *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct nfp_fl_nft_tc_merge *m_entry) { struct flow_action_entry *ct_act, *ct_goto; struct nfp_fl_ct_flow_entry *ct_entry; @@ -1787,6 +1830,20 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_PRE_CT; ct_entry->chain_index = flow->common.chain_index; ct_entry->goto_chain_index = ct_goto->chain_index; + + if (m_entry) { + struct nfp_fl_ct_flow_entry *pre_ct_entry; + int i; + + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) + ct_entry->prev_m_entries[i] = pre_ct_entry->prev_m_entries[i]; + ct_entry->prev_m_entries[i++] = m_entry; + ct_entry->num_prev_m_entries = i; + + m_entry->next_pre_ct_entry = ct_entry; + } + list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1864,6 +1921,28 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, return 0; } +int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry) +{ + struct nfp_fl_ct_flow_entry *pre_ct_entry, *post_ct_entry; + struct flow_cls_offload new_pre_ct_flow; + int err; + + pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; + if (pre_ct_entry->num_prev_m_entries >= NFP_MAX_RECIRC_CT_ZONES - 1) + return -1; + + post_ct_entry = m_entry->tc_m_parent->post_ct_parent; + memset(&new_pre_ct_flow, 0, sizeof(struct flow_cls_offload)); + new_pre_ct_flow.rule = post_ct_entry->rule; + new_pre_ct_flow.common.chain_index = post_ct_entry->chain_index; + + err = nfp_fl_ct_handle_pre_ct(pre_ct_entry->zt->priv, + pre_ct_entry->netdev, + &new_pre_ct_flow, NULL, + m_entry); + return err; +} + static void nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, enum ct_entry_type type, u64 *m_pkts, @@ -1909,6 +1988,32 @@ nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, 0, priv->stats[ctx_id].used, FLOW_ACTION_HW_STATS_DELAYED); } + + /* Update previous pre_ct/post_ct/nft flow stats */ + if (nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries > 0) { + struct nfp_fl_nft_tc_merge *tmp_nft_merge; + int i; + + for (i = 0; i < nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries; i++) { + tmp_nft_merge = nft_merge->tc_m_parent->pre_ct_parent->prev_m_entries[i]; + flow_stats_update(&tmp_nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + flow_stats_update(&tmp_nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + flow_stats_update(&tmp_nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } + /* Reset stats from the nfp */ priv->stats[ctx_id].pkts = 0; priv->stats[ctx_id].bytes = 0; @@ -2113,10 +2218,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) switch (ct_entry->type) { case CT_TYPE_PRE_CT: zt->pre_ct_count--; - rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, - nfp_ct_map_params); + if (ct_map_ent->cookie > 0) + rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, + nfp_ct_map_params); nfp_fl_ct_clean_flow_entry(ct_entry); - kfree(ct_map_ent); + if (ct_map_ent->cookie > 0) + kfree(ct_map_ent); if (!zt->pre_ct_count) { zt->nft = NULL; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 9440ab776ecea..c4ec783580332 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -86,6 +86,9 @@ enum ct_entry_type { _CT_TYPE_MAX, }; +#define NFP_MAX_RECIRC_CT_ZONES 4 +#define NFP_MAX_ENTRY_RULES (NFP_MAX_RECIRC_CT_ZONES * 2 + 1) + enum nfp_nfp_layer_name { FLOW_PAY_META_TCI = 0, FLOW_PAY_INPORT, @@ -114,27 +117,31 @@ enum nfp_nfp_layer_name { * @chain_index: Chain index of the original flow * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. - * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to * @children: List of tc_merge flows this flow forms part of * @rule: Reference to the original TC flow rule * @stats: Used to cache stats for updating + * @prev_m_entries: Array of all previous nft_tc_merge entries + * @num_prev_m_entries: The number of all previous nft_tc_merge entries * @tun_offset: Used to indicate tunnel action offset in action list * @flags: Used to indicate flow flag like NAT which used by merge. + * @type: Type of ct-entry from enum ct_entry_type */ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; u32 goto_chain_index; - enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; struct list_head children; struct flow_rule *rule; struct flow_stats stats; + struct nfp_fl_nft_tc_merge *prev_m_entries[NFP_MAX_RECIRC_CT_ZONES - 1]; + u8 num_prev_m_entries; u8 tun_offset; // Set to NFP_FL_CT_NO_TUN if no tun u8 flags; + u8 type; }; /** @@ -171,6 +178,7 @@ struct nfp_fl_ct_tc_merge { * @nft_parent: The nft_entry parent * @tc_flower_cookie: The cookie of the flow offloaded to the nfp * @flow_pay: Reference to the offloaded flow struct + * @next_pre_ct_entry: Reference to the next ct zone pre ct entry */ struct nfp_fl_nft_tc_merge { struct net_device *netdev; @@ -183,6 +191,7 @@ struct nfp_fl_nft_tc_merge { struct nfp_fl_ct_flow_entry *nft_parent; unsigned long tc_flower_cookie; struct nfp_fl_payload *flow_pay; + struct nfp_fl_ct_flow_entry *next_pre_ct_entry; }; /** @@ -206,6 +215,7 @@ bool is_post_ct_flow(struct flow_cls_offload *flow); * @netdev: netdev structure. * @flow: TC flower classifier offload structure. * @extack: Extack pointer for errors + * @m_entry:previous nfp_fl_nft_tc_merge entry * * Adds a new entry to the relevant zone table and tries to * merge with other +trk+est entries and offload if possible. @@ -215,7 +225,8 @@ bool is_post_ct_flow(struct flow_cls_offload *flow); int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, struct net_device *netdev, struct flow_cls_offload *flow, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + struct nfp_fl_nft_tc_merge *m_entry); /** * nfp_fl_ct_handle_post_ct() - Handles +trk+est conntrack rules * @priv: Pointer to app priv @@ -233,6 +244,19 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, struct flow_cls_offload *flow, struct netlink_ext_ack *extack); +/** + * nfp_fl_create_new_pre_ct() - create next ct_zone -trk conntrack rules + * @m_entry:previous nfp_fl_nft_tc_merge entry + * + * Create a new pre_ct entry from previous nfp_fl_nft_tc_merge entry + * to the next relevant zone table. Try to merge with other +trk+est + * entries and offload if possible. The created new pre_ct entry is + * linked to the previous nfp_fl_nft_tc_merge entry. + * + * Return: negative value on error, 0 if configured successfully. + */ +int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry); + /** * nfp_fl_ct_clean_flow_entry() - Free a nfp_fl_ct_flow_entry * @entry: Flow entry to cleanup diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 8593cafa63683..18328eb7f5c33 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1344,7 +1344,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, port = nfp_port_from_netdev(netdev); if (is_pre_ct_flow(flow)) - return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack); + return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack, NULL); if (is_post_ct_flow(flow)) return nfp_fl_ct_handle_post_ct(priv, netdev, flow, extack); -- GitLab From 9bdf4489a3950335af088604876fdc3209a9dd8e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Mar 2023 08:23:15 +0000 Subject: [PATCH 0501/2078] net: phy: micrel: Fix spelling mistake "minimim" -> "minimum" There is a spelling mistake in a pr_warn_ratelimited message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20230314082315.26532-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 0cf0ef3a96a36..e26c6723caa4d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4106,7 +4106,7 @@ static int lan8841_ptp_perout(struct ptp_clock_info *ptp, period_nsec = timespec64_to_ns(&ts_period); if (period_nsec < 200) { - pr_warn_ratelimited("%s: perout period too small, minimim is 200 nsec\n", + pr_warn_ratelimited("%s: perout period too small, minimum is 200 nsec\n", phydev_name(phydev)); return -EOPNOTSUPP; } -- GitLab From 499183cc3b52613f06cf4ce70809546971c96ed8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 15 Mar 2023 20:28:29 -0700 Subject: [PATCH 0502/2078] wifi: iwlwifi: Avoid disabling GCC specific flag with clang Clang errors: drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c:15:32: error: unknown warning group '-Wsuggest-attribute=format', ignored [-Werror,-Wunknown-warning-option] #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" ^ 1 error generated. The warning being disabled by this pragma is GCC specific. Guard its use with CONFIG_CC_IS_GCC so that it is not used with clang to clear up the error. Fixes: 4eca8cbf7ba8 ("wifi: iwlwifi: suppress printf warnings in tracing") Link: https://github.com/ClangBuiltLinux/linux/issues/1818 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230315-iwlwifi-fix-pragma-v1-1-ad23f92c4739@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index c190ec5effa11..e46639b097f46 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -12,7 +12,9 @@ #include "iwl-trans.h" #define CREATE_TRACE_POINTS +#ifdef CONFIG_CC_IS_GCC #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" +#endif #include "iwl-devtrace.h" EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event); -- GitLab From ed01385c0d78a025bdc72128b7aa7c3309cd5852 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 15 Mar 2023 17:07:25 -0700 Subject: [PATCH 0503/2078] selftests/bpf: Use ASSERT_EQ instead ASSERT_OK for testing memcmp result In tcp_hdr_options test, it ensures the received tcp hdr option and the sk local storage have the expected values. It uses memcmp to check that. Testing the memcmp result with ASSERT_OK is confusing because ASSERT_OK will print out the errno which is not set. This patch uses ASSERT_EQ to check for 0 instead. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230316000726.1016773-1-martin.lau@linux.dev --- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 5cf85d0f98277..13bcaeb028b84 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -151,7 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp, const struct bpf_test_option *act, const char *hdr_desc) { - if (!ASSERT_OK(memcmp(exp, act, sizeof(*exp)), hdr_desc)) { + if (!ASSERT_EQ(memcmp(exp, act, sizeof(*exp)), 0, hdr_desc)) { print_option(exp, "expected: "); print_option(act, " actual: "); return -1; @@ -169,7 +169,7 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd, "map_lookup(hdr_stg_map_fd)")) return -1; - if (!ASSERT_OK(memcmp(exp, &act, sizeof(*exp)), stg_desc)) { + if (!ASSERT_EQ(memcmp(exp, &act, sizeof(*exp)), 0, stg_desc)) { print_hdr_stg(exp, "expected: "); print_hdr_stg(&act, " actual: "); return -1; -- GitLab From 226efec2b0efad60d4a6c4b2c3a8710dafc4dc21 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 15 Mar 2023 17:07:26 -0700 Subject: [PATCH 0504/2078] selftests/bpf: Fix a fd leak in an error path in network_helpers.c In __start_server, it leaks a fd when setsockopt(SO_REUSEPORT) fails. This patch fixes it. Fixes: eed92afdd14c ("bpf: selftest: Test batching and bpf_(get|set)sockopt in bpf tcp iter") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230316000726.1016773-2-martin.lau@linux.dev --- tools/testing/selftests/bpf/network_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 01de33191226b..596caa1765820 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -95,7 +95,7 @@ static int __start_server(int type, int protocol, const struct sockaddr *addr, if (reuseport && setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { log_err("Failed to set SO_REUSEPORT"); - return -1; + goto error_close; } if (bind(fd, addr, addrlen) < 0) { -- GitLab From 6cb9430be1471d631e1b6b138e6d26657a9caa81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 15 Mar 2023 17:15:50 +0000 Subject: [PATCH 0505/2078] libbpf: Ignore warnings about "inefficient alignment" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some consumers of libbpf compile the code base with different warnings enabled. In a report for perf, for example, -Wpacked was set which caused warnings about "inefficient alignment" to be emitted on a subset of supported architectures. With this change we silence specifically those warnings, as we intentionally worked with packed structs. This is a similar resolution as in b2f10cd4e805 ("perf cpumap: Fix alignment for masks in event encoding"). Fixes: 1eebcb60633f ("libbpf: Implement basic zip archive parsing support") Reported-by: Linux Kernel Functional Testing Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Cc: Ian Rogers Link: https://lore.kernel.org/bpf/CA+G9fYtBnwxAWXi2+GyNByApxnf_DtP1-6+_zOKAdJKnJBexjg@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20230315171550.1551603-1-deso@posteo.net --- tools/lib/bpf/zip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index f561aa07438f5..3f26d629b2b49 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -16,6 +16,10 @@ #include "libbpf_internal.h" #include "zip.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + /* Specification of ZIP file format can be found here: * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT * For a high level overview of the structure of a ZIP file see @@ -119,6 +123,8 @@ struct local_file_header { __u16 extra_field_length; } __attribute__((packed)); +#pragma GCC diagnostic pop + struct zip_archive { void *data; __u32 size; -- GitLab From 0ba13995be9b416ea1d3daaf3ba871a67f45899b Mon Sep 17 00:00:00 2001 From: Xu Liang Date: Wed, 15 Mar 2023 00:30:23 +0800 Subject: [PATCH 0506/2078] net: phy: mxl-gpy: enhance delay time required by loopback disable function GPY2xx devices need 3 seconds to fully switch out of loopback mode before it can safely re-enter loopback mode. Implement timeout mechanism to guarantee 3 seconds waited before re-enter loopback mode. Signed-off-by: Xu Liang Signed-off-by: David S. Miller --- drivers/net/phy/mxl-gpy.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index e5972b4ef6e8f..8e6bb97b5f85c 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -107,6 +107,13 @@ struct gpy_priv { u8 fw_major; u8 fw_minor; + + /* It takes 3 seconds to fully switch out of loopback mode before + * it can safely re-enter loopback mode. Record the time when + * loopback is disabled. Check and wait if necessary before loopback + * is enabled. + */ + u64 lb_dis_to; }; static const struct { @@ -769,18 +776,34 @@ static void gpy_get_wol(struct phy_device *phydev, static int gpy_loopback(struct phy_device *phydev, bool enable) { + struct gpy_priv *priv = phydev->priv; + u16 set = 0; int ret; - ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, - enable ? BMCR_LOOPBACK : 0); - if (!ret) { - /* It takes some time for PHY device to switch - * into/out-of loopback mode. + if (enable) { + u64 now = get_jiffies_64(); + + /* wait until 3 seconds from last disable */ + if (time_before64(now, priv->lb_dis_to)) + msleep(jiffies64_to_msecs(priv->lb_dis_to - now)); + + set = BMCR_LOOPBACK; + } + + ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, set); + if (ret <= 0) + return ret; + + if (enable) { + /* It takes some time for PHY device to switch into + * loopback mode. */ msleep(100); + } else { + priv->lb_dis_to = get_jiffies_64() + HZ * 3; } - return ret; + return 0; } static int gpy115_loopback(struct phy_device *phydev, bool enable) -- GitLab From 053fdaa841bd1af9fe9c2c30bba81119059aac95 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 14 Mar 2023 15:13:08 -0500 Subject: [PATCH 0507/2078] nfc: mrvl: Move platform_data struct into driver There are no users of nfcmrvl platform_data struct outside of the driver and none will be added, so move it into the driver. Signed-off-by: Rob Herring Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/nfcmrvl.h | 30 +++++++++++++++-- include/linux/platform_data/nfcmrvl.h | 48 --------------------------- 2 files changed, 28 insertions(+), 50 deletions(-) delete mode 100644 include/linux/platform_data/nfcmrvl.h diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 165bd0a95190b..0f22b3233f733 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -8,8 +8,6 @@ #ifndef _NFCMRVL_H_ #define _NFCMRVL_H_ -#include - #include "fw_dnld.h" /* Define private flags: */ @@ -50,6 +48,34 @@ enum nfcmrvl_phy { NFCMRVL_PHY_SPI = 3, }; +struct nfcmrvl_platform_data { + /* + * Generic + */ + + /* GPIO that is wired to RESET_N signal */ + int reset_n_io; + /* Tell if transport is muxed in HCI one */ + unsigned int hci_muxed; + + /* + * UART specific + */ + + /* Tell if UART needs flow control at init */ + unsigned int flow_control; + /* Tell if firmware supports break control for power management */ + unsigned int break_control; + + + /* + * I2C specific + */ + + unsigned int irq; + unsigned int irq_polarity; +}; + struct nfcmrvl_private { unsigned long flags; diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h deleted file mode 100644 index 9e75ac8d19be3..0000000000000 --- a/include/linux/platform_data/nfcmrvl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2015, Marvell International Ltd. - * - * This software file (the "File") is distributed by Marvell International - * Ltd. under the terms of the GNU General Public License Version 2, June 1991 - * (the "License"). You may use, redistribute and/or modify this File in - * accordance with the terms and conditions of the License, a copy of which - * is available on the worldwide web at - * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. - * - * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE - * ARE EXPRESSLY DISCLAIMED. The License provides additional details about - * this warranty disclaimer. - */ - -#ifndef _NFCMRVL_PTF_H_ -#define _NFCMRVL_PTF_H_ - -struct nfcmrvl_platform_data { - /* - * Generic - */ - - /* GPIO that is wired to RESET_N signal */ - int reset_n_io; - /* Tell if transport is muxed in HCI one */ - unsigned int hci_muxed; - - /* - * UART specific - */ - - /* Tell if UART needs flow control at init */ - unsigned int flow_control; - /* Tell if firmware supports break control for power management */ - unsigned int break_control; - - - /* - * I2C specific - */ - - unsigned int irq; - unsigned int irq_polarity; -}; - -#endif /* _NFCMRVL_PTF_H_ */ -- GitLab From cc6d85c1cb5ccc04c594392339c81227abd1667d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 14 Mar 2023 15:13:09 -0500 Subject: [PATCH 0508/2078] nfc: mrvl: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to of_property_read_bool(). Reviewed-by: Simon Horman Reviewed-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/i2c.c | 2 +- drivers/nfc/nfcmrvl/main.c | 6 +----- drivers/nfc/nfcmrvl/nfcmrvl.h | 6 +++--- drivers/nfc/nfcmrvl/uart.c | 11 ++--------- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index e74342b0b7289..164e2ab859fd9 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -168,7 +168,7 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, return ret; } - if (of_find_property(node, "i2c-int-falling", NULL)) + if (of_property_read_bool(node, "i2c-int-falling")) pdata->irq_polarity = IRQF_TRIGGER_FALLING; else pdata->irq_polarity = IRQF_TRIGGER_RISING; diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 1a5284de4341b..141bc4b66dcb2 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -261,11 +261,7 @@ int nfcmrvl_parse_dt(struct device_node *node, return reset_n_io; } pdata->reset_n_io = reset_n_io; - - if (of_find_property(node, "hci-muxed", NULL)) - pdata->hci_muxed = 1; - else - pdata->hci_muxed = 0; + pdata->hci_muxed = of_property_read_bool(node, "hci-muxed"); return 0; } diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 0f22b3233f733..f61a99e553db0 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -56,16 +56,16 @@ struct nfcmrvl_platform_data { /* GPIO that is wired to RESET_N signal */ int reset_n_io; /* Tell if transport is muxed in HCI one */ - unsigned int hci_muxed; + bool hci_muxed; /* * UART specific */ /* Tell if UART needs flow control at init */ - unsigned int flow_control; + bool flow_control; /* Tell if firmware supports break control for power management */ - unsigned int break_control; + bool break_control; /* diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 9c92cbdc42f08..956ae92f75734 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -76,15 +76,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, return ret; } - if (of_find_property(matched_node, "flow-control", NULL)) - pdata->flow_control = 1; - else - pdata->flow_control = 0; - - if (of_find_property(matched_node, "break-control", NULL)) - pdata->break_control = 1; - else - pdata->break_control = 0; + pdata->flow_control = of_property_read_bool(matched_node, "flow-control"); + pdata->break_control = of_property_read_bool(matched_node, "break-control"); of_node_put(matched_node); -- GitLab From 77473d1a962f3d4f7ba48324502b6d27b8ef2591 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:24 -0500 Subject: [PATCH 0509/2078] bpf: Free struct bpf_cpumask in call_rcu handler The struct bpf_cpumask type uses the bpf_mem_cache_{alloc,free}() APIs to allocate and free its cpumasks. The bpf_mem allocator may currently immediately reuse some memory when its freed, without waiting for an RCU read cycle to elapse. We want to be able to treat struct bpf_cpumask objects as completely RCU safe. This is necessary for two reasons: 1. bpf_cpumask_kptr_get() currently does an RCU-protected refcnt_inc_not_zero(). This of course assumes that the underlying memory is not reused, and is therefore unsafe in its current form. 2. We want to be able to get rid of bpf_cpumask_kptr_get() entirely, and intead use the superior kptr RCU semantics now afforded by the verifier. This patch fixes (1), and enables (2), by making struct bpf_cpumask RCU safe. A subsequent patch will update the verifier to allow struct bpf_cpumask * pointers to be passed to KF_RCU kfuncs, and then a latter patch will remove bpf_cpumask_kptr_get(). Fixes: 516f4d3397c9 ("bpf: Enable cpumasks to be queried and used as kptrs") Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumask.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index b6587ec40f1b8..98eea62b6b7bf 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -9,6 +9,7 @@ /** * struct bpf_cpumask - refcounted BPF cpumask wrapper structure * @cpumask: The actual cpumask embedded in the struct. + * @rcu: The RCU head used to free the cpumask with RCU safety. * @usage: Object reference counter. When the refcount goes to 0, the * memory is released back to the BPF allocator, which provides * RCU safety. @@ -24,6 +25,7 @@ */ struct bpf_cpumask { cpumask_t cpumask; + struct rcu_head rcu; refcount_t usage; }; @@ -108,6 +110,16 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumas return cpumask; } +static void cpumask_free_cb(struct rcu_head *head) +{ + struct bpf_cpumask *cpumask; + + cpumask = container_of(head, struct bpf_cpumask, rcu); + migrate_disable(); + bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); + migrate_enable(); +} + /** * bpf_cpumask_release() - Release a previously acquired BPF cpumask. * @cpumask: The cpumask being released. @@ -121,11 +133,8 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) if (!cpumask) return; - if (refcount_dec_and_test(&cpumask->usage)) { - migrate_disable(); - bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); - migrate_enable(); - } + if (refcount_dec_and_test(&cpumask->usage)) + call_rcu(&cpumask->rcu, cpumask_free_cb); } /** -- GitLab From 63d2d83d21a6e2c6f019da5b2d5cdabe6d1cb951 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:25 -0500 Subject: [PATCH 0510/2078] bpf: Mark struct bpf_cpumask as rcu protected struct bpf_cpumask is a BPF-wrapper around the struct cpumask type which can be instantiated by a BPF program, and then queried as a cpumask in similar fashion to normal kernel code. The previous patch in this series makes the type fully RCU safe, so the type can be included in the rcu_protected_type BTF ID list. A subsequent patch will remove bpf_cpumask_kptr_get(), as it's no longer useful now that we can just treat the type as RCU safe by default and do our own if check. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 60793f793ca6e..15b5c5c729f99 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4599,6 +4599,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env) BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, cgroup) +BTF_ID(struct, bpf_cpumask) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) -- GitLab From a5a197df58c44ce32a86b57e970da4bd7b71b399 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:26 -0500 Subject: [PATCH 0511/2078] bpf/selftests: Test using global cpumask kptr with RCU Now that struct bpf_cpumask * is considered an RCU-safe type according to the verifier, we should add tests that validate its common usages. This patch adds those tests to the cpumask test suite. A subsequent changes will remove bpf_cpumask_kptr_get(), and will adjust the selftest and BPF documentation accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-4-void@manifault.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/cpumask.c | 1 + .../selftests/bpf/progs/cpumask_common.h | 6 ++ .../selftests/bpf/progs/cpumask_failure.c | 62 +++++++++++++++++++ .../selftests/bpf/progs/cpumask_success.c | 33 ++++++++++ 4 files changed, 102 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 5fbe457c4ebe3..6c0fe23498c7c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -17,6 +17,7 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_insert_kptr_get_release", + "test_global_mask_rcu", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 65e5496ca1b22..7623782fbd625 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -9,6 +9,9 @@ int err; +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(MASK) static struct bpf_cpumask __kptr * global_mask; + struct __cpumask_map_value { struct bpf_cpumask __kptr * cpumask; }; @@ -51,6 +54,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym u32 bpf_cpumask_any(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) { return (const struct cpumask *)cpumask; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index cfe83f0ef9e25..9f726d55f7477 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -127,3 +127,65 @@ int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("R2 must be a rcu pointer") +int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_rcu_read_unlock(); + + /* RCU region is exited before calling KF_RCU kfunc. */ + + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("NULL pointer passed to trusted arg1") +int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + + /* No NULL check is performed on global cpumask kptr. */ + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + + bpf_rcu_read_unlock(); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 97ed08c4ff03c..fe928ff72a064 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -423,3 +423,36 @@ int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_f return 0; } + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + bpf_rcu_read_unlock(); + + return 0; +} -- GitLab From 1b403ce77dfbf234723a91bc411dfb03a0499d6e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:27 -0500 Subject: [PATCH 0512/2078] bpf: Remove bpf_cpumask_kptr_get() kfunc Now that struct bpf_cpumask is RCU safe, there's no need for this kfunc. Rather than doing the following: private(MASK) static struct bpf_cpumask __kptr *global; int BPF_PROG(prog, s32 cpu, ...) { struct bpf_cpumask *cpumask; bpf_rcu_read_lock(); cpumask = bpf_cpumask_kptr_get(&global); if (!cpumask) { bpf_rcu_read_unlock(); return -1; } bpf_cpumask_setall(cpumask); ... bpf_cpumask_release(cpumask); bpf_rcu_read_unlock(); } Programs can instead simply do (assume same global cpumask): int BPF_PROG(prog, ...) { struct bpf_cpumask *cpumask; bpf_rcu_read_lock(); cpumask = global; if (!cpumask) { bpf_rcu_read_unlock(); return -1; } bpf_cpumask_setall(cpumask); ... bpf_rcu_read_unlock(); } In other words, no extra atomic acquire / release, and less boilerplate code. This patch removes both the kfunc, as well as its selftests and documentation. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-5-void@manifault.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumask.c | 29 ------------------ .../selftests/bpf/prog_tests/cpumask.c | 1 - .../selftests/bpf/progs/cpumask_common.h | 1 - .../selftests/bpf/progs/cpumask_failure.c | 24 --------------- .../selftests/bpf/progs/cpumask_success.c | 30 ------------------- 5 files changed, 85 deletions(-) diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 98eea62b6b7bf..db9da2194c1a7 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -82,34 +82,6 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) return cpumask; } -/** - * bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask - * stored in a map. - * @cpumaskp: A pointer to a BPF cpumask map value. - * - * Attempts to acquire a reference to a BPF cpumask stored in a map value. The - * cpumask returned by this function must either be embedded in a map as a - * kptr, or freed with bpf_cpumask_release(). This function may return NULL if - * no BPF cpumask was found in the specified map value. - */ -__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) -{ - struct bpf_cpumask *cpumask; - - /* The BPF memory allocator frees memory backing its caches in an RCU - * callback. Thus, we can safely use RCU to ensure that the cpumask is - * safe to read. - */ - rcu_read_lock(); - - cpumask = READ_ONCE(*cpumaskp); - if (cpumask && !refcount_inc_not_zero(&cpumask->usage)) - cpumask = NULL; - - rcu_read_unlock(); - return cpumask; -} - static void cpumask_free_cb(struct rcu_head *head) { struct bpf_cpumask *cpumask; @@ -435,7 +407,6 @@ BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 6c0fe23498c7c..cdf4acc18e4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -16,7 +16,6 @@ static const char * const cpumask_success_testcases[] = { "test_copy_any_anyand", "test_insert_leave", "test_insert_remove_release", - "test_insert_kptr_get_release", "test_global_mask_rcu", }; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 7623782fbd625..0c5b785a93e45 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -26,7 +26,6 @@ struct array_map { struct bpf_cpumask *bpf_cpumask_create(void) __ksym; void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; -struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 9f726d55f7477..db4f94e72b615 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -94,30 +94,6 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("Unreleased reference") -int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags) -{ - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; - - cpumask = create_cpumask(); - if (!cpumask) - return 0; - - if (cpumask_map_insert(cpumask)) - return 0; - - v = cpumask_map_value_lookup(); - if (!v) - return 0; - - cpumask = bpf_cpumask_kptr_get(&v->cpumask); - - /* cpumask is never released. */ - return 0; -} - SEC("tp_btf/task_newtask") __failure __msg("NULL pointer passed to trusted arg0") int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index fe928ff72a064..2fcdd7f68ac7a 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -394,36 +394,6 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla return 0; } -SEC("tp_btf/task_newtask") -int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags) -{ - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; - - cpumask = create_cpumask(); - if (!cpumask) - return 0; - - if (cpumask_map_insert(cpumask)) { - err = 3; - return 0; - } - - v = cpumask_map_value_lookup(); - if (!v) { - err = 4; - return 0; - } - - cpumask = bpf_cpumask_kptr_get(&v->cpumask); - if (cpumask) - bpf_cpumask_release(cpumask); - else - err = 5; - - return 0; -} - SEC("tp_btf/task_newtask") int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) { -- GitLab From fec2c6d14fd5001e7d24a2ae44f0e9aea82a6149 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 16 Mar 2023 00:40:28 -0500 Subject: [PATCH 0513/2078] bpf,docs: Remove bpf_cpumask_kptr_get() from documentation Now that the kfunc no longer exists, we can remove it and instead describe how RCU can be used to get a struct bpf_cpumask from a map value. This patch updates the BPF documentation accordingly. Signed-off-by: David Vernet Link: https://lore.kernel.org/r/20230316054028.88924-6-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/cpumasks.rst | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 75344cd230e55..41efd8874eebc 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -117,12 +117,7 @@ For example: As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in a map, the reference can be removed from the map with bpf_kptr_xchg(), or -opportunistically acquired with bpf_cpumask_kptr_get(): - -.. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_kptr_get - -Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: +opportunistically acquired using RCU: .. code-block:: c @@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: /** * A simple example tracepoint program showing how a * struct bpf_cpumask * kptr that is stored in a map can - * be acquired using the bpf_cpumask_kptr_get() kfunc. + * be passed to kfuncs using RCU protection. */ SEC("tp_btf/cgroup_mkdir") int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path) @@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: if (!v) return -ENOENT; + bpf_rcu_read_lock(); /* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */ - kptr = bpf_cpumask_kptr_get(&v->cpumask); - if (!kptr) + kptr = v->cpumask; + if (!kptr) { /* If no bpf_cpumask was present in the map, it's because * we're racing with another CPU that removed it with * bpf_kptr_xchg() between the bpf_map_lookup_elem() - * above, and our call to bpf_cpumask_kptr_get(). - * bpf_cpumask_kptr_get() internally safely handles this - * race, and will return NULL if the cpumask is no longer - * present in the map by the time we invoke the kfunc. + * above, and our load of the pointer from the map. */ + bpf_rcu_read_unlock(); return -EBUSY; + } - /* Free the reference we just took above. Note that the - * original struct bpf_cpumask * kptr is still in the map. It will - * be freed either at a later time if another context deletes - * it from the map, or automatically by the BPF subsystem if - * it's still present when the map is destroyed. - */ - bpf_cpumask_release(kptr); + bpf_cpumask_setall(kptr); + bpf_rcu_read_unlock(); return 0; } -- GitLab From 082cdc69a4651dd2a77539d69416a359ed1214f5 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Wed, 15 Mar 2023 17:54:00 +0100 Subject: [PATCH 0514/2078] bpf: Remove misleading spec_v1 check on var-offset stack read For every BPF_ADD/SUB involving a pointer, adjust_ptr_min_max_vals() ensures that the resulting pointer has a constant offset if bypass_spec_v1 is false. This is ensured by calling sanitize_check_bounds() which in turn calls check_stack_access_for_ptr_arithmetic(). There, -EACCESS is returned if the register's offset is not constant, thereby rejecting the program. In summary, an unprivileged user must never be able to create stack pointers with a variable offset. That is also the case, because a respective check in check_stack_write() is missing. If they were able to create a variable-offset pointer, users could still use it in a stack-write operation to trigger unsafe speculative behavior [1]. Because unprivileged users must already be prevented from creating variable-offset stack pointers, viable options are to either remove this check (replacing it with a clarifying comment), or to turn it into a "verifier BUG"-message, also adding a similar check in check_stack_write() (for consistency, as a second-level defense). This patch implements the first option to reduce verifier bloat. This check was introduced by commit 01f810ace9ed ("bpf: Allow variable-offset stack access") which correctly notes that "variable-offset reads and writes are disallowed (they were already disallowed for the indirect access case) because the speculative execution checking code doesn't support them". However, it does not further discuss why the check in check_stack_read() is necessary. The code which made this check obsolete was also introduced in this commit. I have compiled ~650 programs from the Linux selftests, Linux samples, Cilium, and libbpf/examples projects and confirmed that none of these trigger the check in check_stack_read() [2]. Instead, all of these programs are, as expected, already rejected when constructing the variable-offset pointers. Note that the check in check_stack_access_for_ptr_arithmetic() also prints "off=%d" while the code removed by this patch does not (the error removed does not appear in the "verification_error" values). For reproducibility, the repository linked includes the raw data and scripts used to create the plot. [1] https://arxiv.org/pdf/1807.03757.pdf [2] https://gitlab.cs.fau.de/un65esoq/bpf-spectre/-/raw/53dc19fcf459c186613b1156a81504b39c8d49db/data/plots/23-02-26_23-56_bpftool/bpftool/0004-errors.pdf?inline=false Fixes: 01f810ace9ed ("bpf: Allow variable-offset stack access") Signed-off-by: Luis Gerhorst Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230315165358.23701-1-gerhorst@cs.fau.de --- kernel/bpf/verifier.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 15b5c5c729f99..d62b7127ff2a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4303,17 +4303,13 @@ static int check_stack_read(struct bpf_verifier_env *env, } /* Variable offset is prohibited for unprivileged mode for simplicity * since it requires corresponding support in Spectre masking for stack - * ALU. See also retrieve_ptr_limit(). + * ALU. See also retrieve_ptr_limit(). The check in + * check_stack_access_for_ptr_arithmetic() called by + * adjust_ptr_min_max_vals() prevents users from creating stack pointers + * with variable offsets, therefore no check is required here. Further, + * just checking it here would be insufficient as speculative stack + * writes could still lead to unsafe speculative behaviour. */ - if (!env->bypass_spec_v1 && var_off) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", - ptr_regno, tn_buf); - return -EACCES; - } - if (!var_off) { off += reg->var_off.value; err = check_stack_read_fixed_off(env, state, off, size, -- GitLab From d565263b7d83371e64ef315bdc3428909808b46f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 Mar 2023 20:18:24 +0200 Subject: [PATCH 0515/2078] net: dsa: hellcreek: Get rid of custom led_init_default_state_get() LED core provides a helper to parse default state from firmware node. Use it instead of custom implementation. Signed-off-by: Andy Shevchenko Reviewed-by: Kurt Kanzenbach Reviewed-by: Michal Swiatkowski Link: https://lore.kernel.org/r/20230314181824.56881-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek_ptp.c | 45 ++++++++++++---------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index b28baab6d56a1..3e44ccb7db84c 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -297,7 +297,8 @@ static enum led_brightness hellcreek_led_is_gm_get(struct led_classdev *ldev) static int hellcreek_led_setup(struct hellcreek *hellcreek) { struct device_node *leds, *led = NULL; - const char *label, *state; + enum led_default_state state; + const char *label; int ret = -EINVAL; of_node_get(hellcreek->dev->of_node); @@ -318,16 +319,17 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) ret = of_property_read_string(led, "label", &label); hellcreek->led_sync_good.name = ret ? "sync_good" : label; - ret = of_property_read_string(led, "default-state", &state); - if (!ret) { - if (!strcmp(state, "on")) - hellcreek->led_sync_good.brightness = 1; - else if (!strcmp(state, "off")) - hellcreek->led_sync_good.brightness = 0; - else if (!strcmp(state, "keep")) - hellcreek->led_sync_good.brightness = - hellcreek_get_brightness(hellcreek, - STATUS_OUT_SYNC_GOOD); + state = led_init_default_state_get(of_fwnode_handle(led)); + switch (state) { + case LEDS_DEFSTATE_ON: + hellcreek->led_sync_good.brightness = 1; + break; + case LEDS_DEFSTATE_KEEP: + hellcreek->led_sync_good.brightness = + hellcreek_get_brightness(hellcreek, STATUS_OUT_SYNC_GOOD); + break; + default: + hellcreek->led_sync_good.brightness = 0; } hellcreek->led_sync_good.max_brightness = 1; @@ -344,16 +346,17 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) ret = of_property_read_string(led, "label", &label); hellcreek->led_is_gm.name = ret ? "is_gm" : label; - ret = of_property_read_string(led, "default-state", &state); - if (!ret) { - if (!strcmp(state, "on")) - hellcreek->led_is_gm.brightness = 1; - else if (!strcmp(state, "off")) - hellcreek->led_is_gm.brightness = 0; - else if (!strcmp(state, "keep")) - hellcreek->led_is_gm.brightness = - hellcreek_get_brightness(hellcreek, - STATUS_OUT_IS_GM); + state = led_init_default_state_get(of_fwnode_handle(led)); + switch (state) { + case LEDS_DEFSTATE_ON: + hellcreek->led_is_gm.brightness = 1; + break; + case LEDS_DEFSTATE_KEEP: + hellcreek->led_is_gm.brightness = + hellcreek_get_brightness(hellcreek, STATUS_OUT_IS_GM); + break; + default: + hellcreek->led_is_gm.brightness = 0; } hellcreek->led_is_gm.max_brightness = 1; -- GitLab From abc783a7b0ff34d20a799d2fbf0ed0b2b06b72ed Mon Sep 17 00:00:00 2001 From: Durai Manickam KR Date: Wed, 15 Mar 2023 15:20:52 +0530 Subject: [PATCH 0516/2078] net: macb: Add PTP support to GEM for sama7g5 Add PTP capability to the Gigabit Ethernet MAC. Signed-off-by: Durai Manickam KR Reviewed-by: Claudiu Beznea Reviewed-by: Michal Swiatkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6e141a8bbf43c..27fc6c903d250 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4844,7 +4844,7 @@ static const struct macb_config mpfs_config = { static const struct macb_config sama7g5_gem_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | - MACB_CAPS_MIIONRGMII, + MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, -- GitLab From 9bae0dd05e61009bf45f4a6a8fbde87ed9567166 Mon Sep 17 00:00:00 2001 From: Durai Manickam KR Date: Wed, 15 Mar 2023 15:20:53 +0530 Subject: [PATCH 0517/2078] net: macb: Add PTP support to EMAC for sama7g5 Add PTP capability to the Ethernet MAC. Signed-off-by: Durai Manickam KR Reviewed-by: Claudiu Beznea Reviewed-by: Michal Swiatkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 27fc6c903d250..1dbee16fe90ac 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4853,7 +4853,8 @@ static const struct macb_config sama7g5_gem_config = { static const struct macb_config sama7g5_emac_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | - MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII, + MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | + MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, -- GitLab From e5995bc7e2ba1a0d444f806016d2e4ea91c032d0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 16 Mar 2023 18:50:50 +0100 Subject: [PATCH 0518/2078] bpf, test_run: fix crashes due to XDP frame overwriting/corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot and Ilya faced the splats when %XDP_PASS happens for bpf_test_run after skb PP recycling was enabled for {__,}xdp_build_skb_from_frame(): BUG: kernel NULL pointer dereference, address: 0000000000000d28 RIP: 0010:memset_erms+0xd/0x20 arch/x86/lib/memset_64.S:66 [...] Call Trace: __finalize_skb_around net/core/skbuff.c:321 [inline] __build_skb_around+0x232/0x3a0 net/core/skbuff.c:379 build_skb_around+0x32/0x290 net/core/skbuff.c:444 __xdp_build_skb_from_frame+0x121/0x760 net/core/xdp.c:622 xdp_recv_frames net/bpf/test_run.c:248 [inline] xdp_test_run_batch net/bpf/test_run.c:334 [inline] bpf_test_run_xdp_live+0x1289/0x1930 net/bpf/test_run.c:362 bpf_prog_test_run_xdp+0xa05/0x14e0 net/bpf/test_run.c:1418 [...] This happens due to that it calls xdp_scrub_frame(), which nullifies xdpf->data. bpf_test_run code doesn't reinit the frame when the XDP program doesn't adjust head or tail. Previously, %XDP_PASS meant the page will be released from the pool and returned to the MM layer, but now it does return to the Pool with the nullified xdpf->data, which doesn't get reinitialized then. So, in addition to checking whether the head and/or tail have been adjusted, check also for a potential XDP frame corruption. xdpf->data is 100% affected and also xdpf->flags is the field closest to the metadata / frame start. Checking for these two should be enough for non-extreme cases. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: syzbot+e1d1b65f7c32f2a86a9f@syzkaller.appspotmail.com Link: https://lore.kernel.org/bpf/000000000000f1985705f6ef2243@google.com Reported-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/e07dd94022ad5731705891b9487cc9ed66328b94.camel@linux.ibm.com Signed-off-by: Alexander Lobakin Acked-by: Toke Høiland-Jørgensen Tested-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230316175051.922550-2-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 71226f68270d9..8d6b31209bd62 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -208,6 +208,16 @@ static void xdp_test_run_teardown(struct xdp_test_data *xdp) kfree(xdp->skbs); } +static bool frame_was_changed(const struct xdp_page_head *head) +{ + /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, + * i.e. has the highest chances to be overwritten. If those two are + * untouched, it's most likely safe to skip the context reset. + */ + return head->frm.data != head->orig_ctx.data || + head->frm.flags != head->orig_ctx.flags; +} + static bool ctx_was_changed(struct xdp_page_head *head) { return head->orig_ctx.data != head->ctx.data || @@ -217,7 +227,7 @@ static bool ctx_was_changed(struct xdp_page_head *head) static void reset_ctx(struct xdp_page_head *head) { - if (likely(!ctx_was_changed(head))) + if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) return; head->ctx.data = head->orig_ctx.data; -- GitLab From 5640b6d894342d153b719644681b0345fd28ee96 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 16 Mar 2023 18:50:51 +0100 Subject: [PATCH 0519/2078] selftests/bpf: fix "metadata marker" getting overwritten by the netstack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin Acked-by: Toke Høiland-Jørgensen Tested-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 7 ++++--- tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 856cbc29e6a1d..4eaa3dcaebc83 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -86,12 +86,12 @@ static void test_max_pkt_size(int fd) void test_xdp_do_redirect(void) { int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst; - char data[sizeof(pkt_udp) + sizeof(__u32)]; + char data[sizeof(pkt_udp) + sizeof(__u64)]; struct test_xdp_do_redirect *skel = NULL; struct nstoken *nstoken = NULL; struct bpf_link *link; LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); - struct xdp_md ctx_in = { .data = sizeof(__u32), + struct xdp_md ctx_in = { .data = sizeof(__u64), .data_end = sizeof(data) }; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, @@ -105,8 +105,9 @@ void test_xdp_do_redirect(void) DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); - memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp)); + memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp)); *((__u32 *)data) = 0x42; /* metadata test value */ + *((__u32 *)data + 4) = 0; skel = test_xdp_do_redirect__open(); if (!ASSERT_OK_PTR(skel, "skel")) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index cd2d4e3258b89..5baaafed0d2de 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -52,7 +52,7 @@ int xdp_redirect(struct xdp_md *xdp) *payload = MARK_IN; - if (bpf_xdp_adjust_meta(xdp, 4)) + if (bpf_xdp_adjust_meta(xdp, sizeof(__u64))) return XDP_ABORTED; if (retcode > XDP_PASS) -- GitLab From c36a77c33db36277ca33b6a2fa5f802ba93694f7 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Wed, 15 Mar 2023 00:49:22 -0300 Subject: [PATCH 0520/2078] net: dsa: realtek: rtl8365mb: add change_mtu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rtl8365mb was using a fixed MTU size of 1536, which was probably inspired by the rtl8366rb's initial frame size. However, unlike that family, the rtl8365mb family can specify the max frame size in bytes, rather than in fixed steps. DSA calls change_mtu for the CPU port once the max MTU value among the ports changes. As the max frame size is defined globally, the switch is configured only when the call affects the CPU port. The available specifications do not directly define the max supported frame size, but it mentions a 16k limit. This driver will use the 0x3FFF limit as it is used in the vendor API code. However, the switch sets the max frame size to 16368 bytes (0x3FF0) after it resets. change_mtu uses MTU size, or ethernet payload size, while the switch works with frame size. The frame size is calculated considering the ethernet header (14 bytes), a possible 802.1Q tag (4 bytes), the payload size (MTU), and the Ethernet FCS (4 bytes). The CPU tag (8 bytes) is consumed before the switch enforces the limit. During setup, the driver will use the default 1500-byte MTU of DSA to set the maximum frame size. The current sum will be VLAN_ETH_HLEN+1500+ETH_FCS_LEN, which results in 1522 bytes. Although it is lower than the previous initial value of 1536 bytes, the driver will increase the frame size for a larger MTU. However, if something requires more space without increasing the MTU, such as QinQ, we would need to add the extra length to the rtl8365mb_port_change_mtu() formula. MTU was tested up to 2018 (with 802.1Q) as that is as far as mt7620 (where rtl8367s is stacked) can go. The register was manually manipulated byte-by-byte to ensure the MTU to frame size conversion was correct. For frames without 802.1Q tag, the frame size limit will be 4 bytes over the required size. There is a jumbo register, enabled by default at 6k frame size. However, the jumbo settings do not seem to limit nor expand the maximum tested MTU (2018), even when jumbo is disabled. More tests are needed with a device that can handle larger frames. Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Alexander Duyck Reviewed-by: Alvin Šipraga Signed-off-by: David S. Miller --- drivers/net/dsa/realtek/rtl8365mb.c | 40 ++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index da31d8b839ac6..41ea3b5a42b14 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -98,6 +98,7 @@ #include #include #include +#include #include "realtek.h" @@ -267,6 +268,7 @@ /* Maximum packet length register */ #define RTL8365MB_CFG0_MAX_LEN_REG 0x088C #define RTL8365MB_CFG0_MAX_LEN_MASK 0x3FFF +#define RTL8365MB_CFG0_MAX_LEN_MAX 0x3FFF /* Port learning limit registers */ #define RTL8365MB_LUT_PORT_LEARN_LIMIT_BASE 0x0A20 @@ -1135,6 +1137,35 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port, } } +static int rtl8365mb_port_change_mtu(struct dsa_switch *ds, int port, + int new_mtu) +{ + struct realtek_priv *priv = ds->priv; + int frame_size; + + /* When a new MTU is set, DSA always sets the CPU port's MTU to the + * largest MTU of the slave ports. Because the switch only has a global + * RX length register, only allowing CPU port here is enough. + */ + if (!dsa_is_cpu_port(ds, port)) + return 0; + + frame_size = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; + + dev_dbg(priv->dev, "changing mtu to %d (frame size: %d)\n", + new_mtu, frame_size); + + return regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG, + RTL8365MB_CFG0_MAX_LEN_MASK, + FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, + frame_size)); +} + +static int rtl8365mb_port_max_mtu(struct dsa_switch *ds, int port) +{ + return RTL8365MB_CFG0_MAX_LEN_MAX - VLAN_ETH_HLEN - ETH_FCS_LEN; +} + static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { @@ -1980,10 +2011,7 @@ static int rtl8365mb_setup(struct dsa_switch *ds) p->index = i; } - /* Set maximum packet length to 1536 bytes */ - ret = regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG, - RTL8365MB_CFG0_MAX_LEN_MASK, - FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536)); + ret = rtl8365mb_port_change_mtu(ds, cpu->trap_port, ETH_DATA_LEN); if (ret) goto out_teardown_irq; @@ -2103,6 +2131,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = { .get_eth_mac_stats = rtl8365mb_get_mac_stats, .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats, .get_stats64 = rtl8365mb_get_stats64, + .port_change_mtu = rtl8365mb_port_change_mtu, + .port_max_mtu = rtl8365mb_port_max_mtu, }; static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = { @@ -2124,6 +2154,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = { .get_eth_mac_stats = rtl8365mb_get_mac_stats, .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats, .get_stats64 = rtl8365mb_get_stats64, + .port_change_mtu = rtl8365mb_port_change_mtu, + .port_max_mtu = rtl8365mb_port_max_mtu, }; static const struct realtek_ops rtl8365mb_ops = { -- GitLab From 81dc07417f0f82cd8f9733a1a3ebc2a473423e86 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Wed, 15 Mar 2023 15:43:04 +0800 Subject: [PATCH 0521/2078] net: wangxun: Implement the ndo change mtu interface Add ngbe and txgbe ndo_change_mtu support. Signed-off-by: Mengyuan Lou Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 21 ++++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_hw.h | 1 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 2 ++ drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 5 ++++- drivers/net/ethernet/wangxun/ngbe/ngbe_type.h | 1 - .../net/ethernet/wangxun/txgbe/txgbe_main.c | 5 ++++- .../net/ethernet/wangxun/txgbe/txgbe_type.h | 1 - 7 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 7db57f934a91e..ca409b4054d06 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -1261,7 +1262,7 @@ static void wx_set_rx_buffer_len(struct wx *wx) struct net_device *netdev = wx->netdev; u32 mhadd, max_frame; - max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN); @@ -1271,6 +1272,24 @@ static void wx_set_rx_buffer_len(struct wx *wx) wr32(wx, WX_PSR_MAX_SZ, max_frame); } +/** + * wx_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + **/ +int wx_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct wx *wx = netdev_priv(netdev); + + netdev->mtu = new_mtu; + wx_set_rx_buffer_len(wx); + + return 0; +} +EXPORT_SYMBOL(wx_change_mtu); + /* Disable the specified rx queue */ void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 44dfd6ea442a9..c173c56f0ab5d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -23,6 +23,7 @@ void wx_flush_sw_mac_table(struct wx *wx); int wx_set_mac(struct net_device *netdev, void *p); void wx_disable_rx(struct wx *wx); void wx_set_rx_mode(struct net_device *netdev); +int wx_change_mtu(struct net_device *netdev, int new_mtu); void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring); void wx_configure(struct wx *wx); int wx_disable_pcie_master(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 77d8d7f1707e9..2b9efd13c500d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -300,6 +300,8 @@ #define WX_MAX_RXD 8192 #define WX_MAX_TXD 8192 +#define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ + /* Supported Rx Buffer Sizes */ #define WX_RXBUFFER_256 256 /* Used for skb receive header */ #define WX_RXBUFFER_2K 2048 diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 0e4163e1106f1..1a004aa2adcb4 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "../libwx/wx_type.h" #include "../libwx/wx_hw.h" @@ -469,6 +470,7 @@ static void ngbe_shutdown(struct pci_dev *pdev) static const struct net_device_ops ngbe_netdev_ops = { .ndo_open = ngbe_open, .ndo_stop = ngbe_close, + .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -560,7 +562,8 @@ static int ngbe_probe(struct pci_dev *pdev, netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = NGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); + netdev->max_mtu = WX_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); wx->bd_number = func_nums; /* setup the private structure */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index a2351349785ea..373d5af628cd5 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -137,7 +137,6 @@ enum NGBE_MSCA_CMD_value { #define NGBE_RX_PB_SIZE 42 #define NGBE_MC_TBL_SIZE 128 #define NGBE_TDB_PB_SZ (20 * 1024) /* 160KB Packet Buffer */ -#define NGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ /* TX/RX descriptor defines */ #define NGBE_DEFAULT_TXD 512 /* default ring size */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 859feaafd3500..843a88bc416fe 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "../libwx/wx_type.h" #include "../libwx/wx_lib.h" @@ -486,6 +487,7 @@ static void txgbe_shutdown(struct pci_dev *pdev) static const struct net_device_ops txgbe_netdev_ops = { .ndo_open = txgbe_open, .ndo_stop = txgbe_close, + .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -603,7 +605,8 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = TXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); + netdev->max_mtu = WX_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); /* make sure the EEPROM is good */ err = txgbe_validate_eeprom_checksum(wx, NULL); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 563ea51deca6b..63a1c733718d3 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -79,7 +79,6 @@ #define TXGBE_SP_MC_TBL_SIZE 128 #define TXGBE_SP_RX_PB_SIZE 512 #define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */ -#define TXGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ /* TX/RX descriptor defines */ #define TXGBE_DEFAULT_TXD 512 -- GitLab From bd7fc6e1957c2102866f9e464c1f2302e891b7e9 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Wed, 15 Mar 2023 04:55:13 -0700 Subject: [PATCH 0522/2078] net: mana: Add new MANA VF performance counters for easier troubleshooting Extended performance counter stats in 'ethtool -S ' output for MANA VF to facilitate troubleshooting. Tested-on: Ubuntu22 Signed-off-by: Shradha Gupta Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/mana_en.c | 62 ++++++++++++++++++- .../ethernet/microsoft/mana/mana_ethtool.c | 52 +++++++++++++++- include/net/mana/mana.h | 18 ++++++ 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 6120f2b6684fb..492474b4d8aac 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -156,6 +156,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct mana_txq *txq; struct mana_cq *cq; int err, len; + u16 ihs; if (unlikely(!apc->port_is_up)) goto tx_drop; @@ -166,6 +167,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; + tx_stats = &txq->stats; pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; @@ -179,10 +181,17 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; - if (pkt_fmt == MANA_SHORT_PKT_FMT) + if (pkt_fmt == MANA_SHORT_PKT_FMT) { pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); - else + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->short_pkt_fmt++; + u64_stats_update_end(&tx_stats->syncp); + } else { pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->long_pkt_fmt++; + u64_stats_update_end(&tx_stats->syncp); + } pkg.wqe_req.inline_oob_data = &pkg.tx_oob; pkg.wqe_req.flags = 0; @@ -232,9 +241,35 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } + + if (skb->encapsulation) { + ihs = skb_inner_tcp_all_headers(skb); + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->tso_inner_packets++; + tx_stats->tso_inner_bytes += skb->len - ihs; + u64_stats_update_end(&tx_stats->syncp); + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + ihs = skb_transport_offset(skb) + sizeof(struct udphdr); + } else { + ihs = skb_tcp_all_headers(skb); + if (ipv6_has_hopopt_jumbo(skb)) + ihs -= sizeof(struct hop_jumbo_hdr); + } + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->tso_packets++; + tx_stats->tso_bytes += skb->len - ihs; + u64_stats_update_end(&tx_stats->syncp); + } + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { csum_type = mana_checksum_info(skb); + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->csum_partial++; + u64_stats_update_end(&tx_stats->syncp); + if (csum_type == IPPROTO_TCP) { pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; @@ -254,8 +289,12 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } - if (mana_map_skb(skb, apc, &pkg)) + if (mana_map_skb(skb, apc, &pkg)) { + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->mana_map_err++; + u64_stats_update_end(&tx_stats->syncp); goto free_sgl_ptr; + } skb_queue_tail(&txq->pending_skbs, skb); @@ -1038,6 +1077,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq) if (comp_read < 1) return; + apc->eth_stats.tx_cqes = comp_read; + for (i = 0; i < comp_read; i++) { struct mana_tx_comp_oob *cqe_oob; @@ -1064,6 +1105,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq) case CQE_TX_VLAN_TAGGING_VIOLATION: WARN_ONCE(1, "TX: CQE error %d: ignored.\n", cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_err++; break; default: @@ -1072,6 +1114,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq) */ WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_unknown_type++; return; } @@ -1118,6 +1161,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq) WARN_ON_ONCE(1); cq->work_done = pkt_transmitted; + + apc->eth_stats.tx_cqes -= pkt_transmitted; } static void mana_post_pkt_rxq(struct mana_rxq *rxq) @@ -1252,12 +1297,15 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; struct net_device *ndev = rxq->ndev; struct mana_recv_buf_oob *rxbuf_oob; + struct mana_port_context *apc; struct device *dev = gc->dev; void *new_buf, *old_buf; struct page *new_page; u32 curr, pktlen; dma_addr_t da; + apc = netdev_priv(ndev); + switch (oob->cqe_hdr.cqe_type) { case CQE_RX_OKAY: break; @@ -1270,6 +1318,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, case CQE_RX_COALESCED_4: netdev_err(ndev, "RX coalescing is unsupported\n"); + apc->eth_stats.rx_coalesced_err++; return; case CQE_RX_OBJECT_FENCE: @@ -1279,6 +1328,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, default: netdev_err(ndev, "Unknown RX CQE type = %d\n", oob->cqe_hdr.cqe_type); + apc->eth_stats.rx_cqe_unknown_type++; return; } @@ -1341,11 +1391,15 @@ static void mana_poll_rx_cq(struct mana_cq *cq) { struct gdma_comp *comp = cq->gdma_comp_buf; struct mana_rxq *rxq = cq->rxq; + struct mana_port_context *apc; int comp_read, i; + apc = netdev_priv(rxq->ndev); + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + apc->eth_stats.rx_cqes = comp_read; rxq->xdp_flush = false; for (i = 0; i < comp_read; i++) { @@ -1357,6 +1411,8 @@ static void mana_poll_rx_cq(struct mana_cq *cq) return; mana_process_rx_cqe(rxq, cq, &comp[i]); + + apc->eth_stats.rx_cqes--; } if (rxq->xdp_flush) diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index 5b776a33a8177..a64c81410dc14 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -13,6 +13,15 @@ static const struct { } mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, + {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)}, + {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, + {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + tx_cqe_unknown_type)}, + {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)}, + {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, + rx_coalesced_err)}, + {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + rx_cqe_unknown_type)}, }; static int mana_get_sset_count(struct net_device *ndev, int stringset) @@ -23,7 +32,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * 8; + return ARRAY_SIZE(mana_eth_stats) + num_queues * + (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -61,6 +71,22 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "tx_%d_xdp_xmit", i); p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_inner_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_inner_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_long_pkt_fmt", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_short_pkt_fmt", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_csum_partial", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_mana_map_err", i); + p += ETH_GSTRING_LEN; } } @@ -78,6 +104,14 @@ static void mana_get_ethtool_stats(struct net_device *ndev, u64 xdp_xmit; u64 xdp_drop; u64 xdp_tx; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 long_pkt_fmt; + u64 short_pkt_fmt; + u64 csum_partial; + u64 mana_map_err; int q, i = 0; if (!apc->port_is_up) @@ -113,11 +147,27 @@ static void mana_get_ethtool_stats(struct net_device *ndev, packets = tx_stats->packets; bytes = tx_stats->bytes; xdp_xmit = tx_stats->xdp_xmit; + tso_packets = tx_stats->tso_packets; + tso_bytes = tx_stats->tso_bytes; + tso_inner_packets = tx_stats->tso_inner_packets; + tso_inner_bytes = tx_stats->tso_inner_bytes; + long_pkt_fmt = tx_stats->long_pkt_fmt; + short_pkt_fmt = tx_stats->short_pkt_fmt; + csum_partial = tx_stats->csum_partial; + mana_map_err = tx_stats->mana_map_err; } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_xmit; + data[i++] = tso_packets; + data[i++] = tso_bytes; + data[i++] = tso_inner_packets; + data[i++] = tso_inner_bytes; + data[i++] = long_pkt_fmt; + data[i++] = short_pkt_fmt; + data[i++] = csum_partial; + data[i++] = mana_map_err; } } diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 3bb579962a14a..bb11a6535d806 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -48,6 +48,10 @@ enum TRI_STATE { #define MAX_PORTS_IN_MANA_DEV 256 +/* Update this count whenever the respective structures are changed */ +#define MANA_STATS_RX_COUNT 5 +#define MANA_STATS_TX_COUNT 11 + struct mana_stats_rx { u64 packets; u64 bytes; @@ -61,6 +65,14 @@ struct mana_stats_tx { u64 packets; u64 bytes; u64 xdp_xmit; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 short_pkt_fmt; + u64 long_pkt_fmt; + u64 csum_partial; + u64 mana_map_err; struct u64_stats_sync syncp; }; @@ -331,6 +343,12 @@ struct mana_tx_qp { struct mana_ethtool_stats { u64 stop_queue; u64 wake_queue; + u64 tx_cqes; + u64 tx_cqe_err; + u64 tx_cqe_unknown_type; + u64 rx_cqes; + u64 rx_coalesced_err; + u64 rx_cqe_unknown_type; }; struct mana_context { -- GitLab From 40235edeadf58e4232bfcf8bf15be453cfe233b7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 15 Mar 2023 13:29:47 +0530 Subject: [PATCH 0523/2078] dt-bindings: net: ti: k3-am654-cpsw-nuss: Fix compatible order Reorder compatibles to follow alphanumeric order. Signed-off-by: Siddharth Vadapalli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 628d63e1eb1f0..6f56add1919b6 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -54,11 +54,11 @@ properties: compatible: enum: + - ti,am642-cpsw-nuss - ti,am654-cpsw-nuss - ti,j7200-cpswxg-nuss - ti,j721e-cpsw-nuss - ti,j721e-cpswxg-nuss - - ti,am642-cpsw-nuss reg: maxItems: 1 @@ -215,8 +215,8 @@ allOf: compatible: contains: enum: - - ti,j721e-cpswxg-nuss - ti,j7200-cpswxg-nuss + - ti,j721e-cpswxg-nuss then: properties: ethernet-ports: -- GitLab From e0c9c2a7dd738120c2fbc155c6fba1066f109be0 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 15 Mar 2023 13:29:48 +0530 Subject: [PATCH 0524/2078] dt-bindings: net: ti: k3-am654-cpsw-nuss: Add J784S4 CPSW9G support Update bindings for TI K3 J784S4 SoC which contains 9 ports (8 external ports) CPSW9G module and add compatible for it. Signed-off-by: Siddharth Vadapalli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 6f56add1919b6..306709bcc9e90 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -59,6 +59,7 @@ properties: - ti,j7200-cpswxg-nuss - ti,j721e-cpsw-nuss - ti,j721e-cpswxg-nuss + - ti,j784s4-cpswxg-nuss reg: maxItems: 1 @@ -197,7 +198,9 @@ allOf: properties: compatible: contains: - const: ti,j721e-cpswxg-nuss + enum: + - ti,j721e-cpswxg-nuss + - ti,j784s4-cpswxg-nuss then: properties: ethernet-ports: @@ -217,6 +220,7 @@ allOf: enum: - ti,j7200-cpswxg-nuss - ti,j721e-cpswxg-nuss + - ti,j784s4-cpswxg-nuss then: properties: ethernet-ports: -- GitLab From 8c44fa12c8fa09c6c12f0dc25129a6d13ee0a1ea Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:45 +0200 Subject: [PATCH 0525/2078] net: Add MDB net device operations Add MDB net device operations that will be invoked by rtnetlink code in response to received RTM_{NEW,DEL,GET}MDB messages. Subsequent patches will implement these operations in the bridge and VXLAN drivers. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee483071cf599..23b0d7eaaadd8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1307,6 +1307,17 @@ struct netdev_net_notifier { * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * + * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[], + * u16 nlmsg_flags, struct netlink_ext_ack *extack); + * Adds an MDB entry to dev. + * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], + * struct netlink_ext_ack *extack); + * Deletes the MDB entry from dev. + * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, + * struct netlink_callback *cb); + * Dumps MDB entries from dev. The first argument (marker) in the netlink + * callback is used by core rtnetlink code. + * * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags, struct netlink_ext_ack *extack) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, @@ -1569,6 +1580,16 @@ struct net_device_ops { const unsigned char *addr, u16 vid, u32 portid, u32 seq, struct netlink_ext_ack *extack); + int (*ndo_mdb_add)(struct net_device *dev, + struct nlattr *tb[], + u16 nlmsg_flags, + struct netlink_ext_ack *extack); + int (*ndo_mdb_del)(struct net_device *dev, + struct nlattr *tb[], + struct netlink_ext_ack *extack); + int (*ndo_mdb_dump)(struct net_device *dev, + struct sk_buff *skb, + struct netlink_callback *cb); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, -- GitLab From c009de1061b57162fed206998dcdf8001416a481 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:46 +0200 Subject: [PATCH 0526/2078] bridge: mcast: Implement MDB net device operations Implement the previously added MDB net device operations in the bridge driver so that they could be invoked by core rtnetlink code in the next patch. The operations are identical to the existing br_mdb_{dump,add,del} functions. The '_new' suffix will be removed in the next patch. The functions are re-implemented in this patch to make the conversion in the next patch easier to review. Add dummy implementations when 'CONFIG_BRIDGE_IGMP_SNOOPING' is disabled, so that an error will be returned to user space when it is trying to add or delete an MDB entry. This is consistent with existing behavior where the bridge driver does not even register rtnetlink handlers for RTM_{NEW,DEL,GET}MDB messages when this Kconfig option is disabled. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 + net/bridge/br_mdb.c | 124 ++++++++++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 25 ++++++++ 3 files changed, 152 insertions(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index b82906fc999a3..85fa4d73bb53b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -468,6 +468,9 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, + .ndo_mdb_add = br_mdb_add_new, + .ndo_mdb_del = br_mdb_del_new, + .ndo_mdb_dump = br_mdb_dump_new, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 25c48d81a5972..cb8270a5480b7 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -458,6 +458,39 @@ out: return skb->len; } +int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net_bridge *br = netdev_priv(dev); + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, sizeof(*bpm), + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->ifindex = dev->ifindex; + + rcu_read_lock(); + + err = br_mdb_fill_info(skb, cb, dev); + if (err) + goto out; + err = br_rports_fill_info(skb, &br->multicast_ctx); + if (err) + goto out; + +out: + rcu_read_unlock(); + nlmsg_end(skb, nlh); + return err; +} + static int nlmsg_populate_mdb_fill(struct sk_buff *skb, struct net_device *dev, struct net_bridge_mdb_entry *mp, @@ -1459,6 +1492,65 @@ out: return err; } +int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct br_mdb_config cfg = {}; + struct net_bridge_vlan *v; + int err; + + /* Configuration structure will be initialized here. */ + + err = -EINVAL; + /* host join errors which can happen before creating the group */ + if (!cfg.p && !br_group_is_l2(&cfg.group)) { + /* don't allow any flags for host-joined IP groups */ + if (cfg.entry->state) { + NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); + goto out; + } + if (!br_multicast_is_star_g(&cfg.group)) { + NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); + goto out; + } + } + + if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); + goto out; + } + + if (cfg.p) { + if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent"); + goto out; + } + vg = nbp_vlan_group(cfg.p); + } else { + vg = br_vlan_group(cfg.br); + } + + /* If vlan filtering is enabled and VLAN is not specified + * install mdb entry on all vlans configured on the port. + */ + if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + cfg.entry->vid = v->vid; + cfg.group.vid = v->vid; + err = __br_mdb_add(&cfg, extack); + if (err) + break; + } + } else { + err = __br_mdb_add(&cfg, extack); + } + +out: + br_mdb_config_fini(&cfg); + return err; +} + static int __br_mdb_del(const struct br_mdb_config *cfg) { struct br_mdb_entry *entry = cfg->entry; @@ -1535,6 +1627,38 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct br_mdb_config cfg = {}; + struct net_bridge_vlan *v; + int err = 0; + + /* Configuration structure will be initialized here. */ + + if (cfg.p) + vg = nbp_vlan_group(cfg.p); + else + vg = br_vlan_group(cfg.br); + + /* If vlan filtering is enabled and VLAN is not specified + * delete mdb entry on all vlans configured on the port. + */ + if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + cfg.entry->vid = v->vid; + cfg.group.vid = v->vid; + err = __br_mdb_del(&cfg); + } + } else { + err = __br_mdb_del(&cfg); + } + + br_mdb_config_fini(&cfg); + return err; +} + void br_mdb_init(void) { rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cef5f6ea850c7..a72847c1dc9f1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -981,6 +981,12 @@ void br_multicast_get_stats(const struct net_bridge *br, u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx); void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max); u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx); +int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack); +int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); +int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb); void br_mdb_init(void); void br_mdb_uninit(void); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, @@ -1374,6 +1380,25 @@ static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, return false; } +static inline int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], + u16 nlmsg_flags, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) +{ + return 0; +} + static inline void br_mdb_init(void) { } -- GitLab From cc7f5022f810cf7bf4f1826dd620656c35942a13 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:47 +0200 Subject: [PATCH 0527/2078] rtnetlink: bridge: mcast: Move MDB handlers out of bridge driver Currently, the bridge driver registers handlers for MDB netlink messages, making it impossible for other drivers to implement MDB support. As a preparation for VXLAN MDB support, move the MDB handlers out of the bridge driver to the core rtnetlink code. The rtnetlink code will call into individual drivers by invoking their previously added MDB net device operations. Note that while the diffstat is large, the change is mechanical. It moves code out of the bridge driver to rtnetlink code. Also note that a similar change was made in 2012 with commit 77162022ab26 ("net: add generic PF_BRIDGE:RTM_ FDB hooks") that moved FDB handlers out of the bridge driver to the core rtnetlink code. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 6 +- net/bridge/br_mdb.c | 301 ++-------------------------------------- net/bridge/br_netlink.c | 3 - net/bridge/br_private.h | 35 ++--- net/core/rtnetlink.c | 217 +++++++++++++++++++++++++++++ 5 files changed, 244 insertions(+), 318 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 85fa4d73bb53b..df47c876230ee 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -468,9 +468,9 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, - .ndo_mdb_add = br_mdb_add_new, - .ndo_mdb_del = br_mdb_del_new, - .ndo_mdb_dump = br_mdb_dump_new, + .ndo_mdb_add = br_mdb_add, + .ndo_mdb_del = br_mdb_del, + .ndo_mdb_dump = br_mdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index cb8270a5480b7..76636c61db21f 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -380,86 +380,8 @@ out: return err; } -static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct br_port_msg *bpm; - - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request"); - return -EINVAL; - } - - bpm = nlmsg_data(nlh); - if (bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(nlh, sizeof(*bpm))) { - NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); - return -EINVAL; - } - - return 0; -} - -static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net_device *dev; - struct net *net = sock_net(skb->sk); - struct nlmsghdr *nlh = NULL; - int idx = 0, s_idx; - - if (cb->strict_check) { - int err = br_mdb_valid_dump_req(cb->nlh, cb->extack); - - if (err < 0) - return err; - } - - s_idx = cb->args[0]; - - rcu_read_lock(); - - for_each_netdev_rcu(net, dev) { - if (netif_is_bridge_master(dev)) { - struct net_bridge *br = netdev_priv(dev); - struct br_port_msg *bpm; - - if (idx < s_idx) - goto skip; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_GETMDB, - sizeof(*bpm), NLM_F_MULTI); - if (nlh == NULL) - break; - - bpm = nlmsg_data(nlh); - memset(bpm, 0, sizeof(*bpm)); - bpm->ifindex = dev->ifindex; - if (br_mdb_fill_info(skb, cb, dev) < 0) - goto out; - if (br_rports_fill_info(skb, &br->multicast_ctx) < 0) - goto out; - - cb->args[1] = 0; - nlmsg_end(skb, nlh); - skip: - idx++; - } - } - -out: - if (nlh) - nlmsg_end(skb, nlh); - rcu_read_unlock(); - cb->args[0] = idx; - return skb->len; -} - -int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, - struct netlink_callback *cb) +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; @@ -716,60 +638,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), }; -static int validate_mdb_entry(const struct nlattr *attr, - struct netlink_ext_ack *extack) -{ - struct br_mdb_entry *entry = nla_data(attr); - - if (nla_len(attr) != sizeof(struct br_mdb_entry)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); - return -EINVAL; - } - - if (entry->ifindex == 0) { - NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed"); - return -EINVAL; - } - - if (entry->addr.proto == htons(ETH_P_IP)) { - if (!ipv4_is_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast"); - return -EINVAL; - } - if (ipv4_is_local_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast"); - return -EINVAL; - } -#if IS_ENABLED(CONFIG_IPV6) - } else if (entry->addr.proto == htons(ETH_P_IPV6)) { - if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { - NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes"); - return -EINVAL; - } -#endif - } else if (entry->addr.proto == 0) { - /* L2 mdb */ - if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { - NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast"); - return -EINVAL; - } - } else { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol"); - return -EINVAL; - } - - if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry state"); - return -EINVAL; - } - if (entry->vid >= VLAN_VID_MASK) { - NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id"); - return -EINVAL; - } - - return 0; -} - static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { @@ -1332,49 +1200,16 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs, return 0; } -static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { - [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, - [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, - validate_mdb_entry, - sizeof(struct br_mdb_entry)), - [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, -}; - -static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, - struct br_mdb_config *cfg, +static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev, + struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { - struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; - struct br_port_msg *bpm; - struct net_device *dev; - int err; - - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); - if (err) - return err; + struct net *net = dev_net(dev); memset(cfg, 0, sizeof(*cfg)); cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; - cfg->nlflags = nlh->nlmsg_flags; - - bpm = nlmsg_data(nlh); - if (!bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); - return -EINVAL; - } - - dev = __dev_get_by_index(net, bpm->ifindex); - if (!dev) { - NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); - return -ENODEV; - } - - if (!netif_is_bridge_master(dev)) { - NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); - return -EOPNOTSUPP; - } + cfg->nlflags = nlmsg_flags; cfg->br = netdev_priv(dev); @@ -1388,11 +1223,6 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { - NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); - return -EINVAL; - } - cfg->entry = nla_data(tb[MDBA_SET_ENTRY]); if (cfg->entry->ifindex != cfg->br->dev->ifindex) { @@ -1430,16 +1260,15 @@ static void br_mdb_config_fini(struct br_mdb_config *cfg) br_mdb_config_src_list_fini(cfg); } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; @@ -1492,65 +1321,6 @@ out: return err; } -int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, - struct netlink_ext_ack *extack) -{ - struct net_bridge_vlan_group *vg; - struct br_mdb_config cfg = {}; - struct net_bridge_vlan *v; - int err; - - /* Configuration structure will be initialized here. */ - - err = -EINVAL; - /* host join errors which can happen before creating the group */ - if (!cfg.p && !br_group_is_l2(&cfg.group)) { - /* don't allow any flags for host-joined IP groups */ - if (cfg.entry->state) { - NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); - goto out; - } - if (!br_multicast_is_star_g(&cfg.group)) { - NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); - goto out; - } - } - - if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) { - NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); - goto out; - } - - if (cfg.p) { - if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) { - NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent"); - goto out; - } - vg = nbp_vlan_group(cfg.p); - } else { - vg = br_vlan_group(cfg.br); - } - - /* If vlan filtering is enabled and VLAN is not specified - * install mdb entry on all vlans configured on the port. - */ - if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { - list_for_each_entry(v, &vg->vlan_list, vlist) { - cfg.entry->vid = v->vid; - cfg.group.vid = v->vid; - err = __br_mdb_add(&cfg, extack); - if (err) - break; - } - } else { - err = __br_mdb_add(&cfg, extack); - } - -out: - br_mdb_config_fini(&cfg); - return err; -} - static int __br_mdb_del(const struct br_mdb_config *cfg) { struct br_mdb_entry *entry = cfg->entry; @@ -1592,16 +1362,15 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; @@ -1626,49 +1395,3 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, br_mdb_config_fini(&cfg); return err; } - -int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], - struct netlink_ext_ack *extack) -{ - struct net_bridge_vlan_group *vg; - struct br_mdb_config cfg = {}; - struct net_bridge_vlan *v; - int err = 0; - - /* Configuration structure will be initialized here. */ - - if (cfg.p) - vg = nbp_vlan_group(cfg.p); - else - vg = br_vlan_group(cfg.br); - - /* If vlan filtering is enabled and VLAN is not specified - * delete mdb entry on all vlans configured on the port. - */ - if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { - list_for_each_entry(v, &vg->vlan_list, vlist) { - cfg.entry->vid = v->vid; - cfg.group.vid = v->vid; - err = __br_mdb_del(&cfg); - } - } else { - err = __br_mdb_del(&cfg); - } - - br_mdb_config_fini(&cfg); - return err; -} - -void br_mdb_init(void) -{ - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); -} - -void br_mdb_uninit(void) -{ - rtnl_unregister(PF_BRIDGE, RTM_GETMDB); - rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); - rtnl_unregister(PF_BRIDGE, RTM_DELMDB); -} diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9173e52b89e21..fefb1c0e248b4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1886,7 +1886,6 @@ int __init br_netlink_init(void) { int err; - br_mdb_init(); br_vlan_rtnl_init(); rtnl_af_register(&br_af_ops); @@ -1898,13 +1897,11 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); - br_mdb_uninit(); return err; } void br_netlink_fini(void) { - br_mdb_uninit(); br_vlan_rtnl_uninit(); rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a72847c1dc9f1..7264fd40f82f6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -981,14 +981,12 @@ void br_multicast_get_stats(const struct net_bridge *br, u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx); void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max); u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx); -int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, - struct netlink_ext_ack *extack); -int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], - struct netlink_ext_ack *extack); -int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, - struct netlink_callback *cb); -void br_mdb_init(void); -void br_mdb_uninit(void); +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack); +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); @@ -1380,33 +1378,24 @@ static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, return false; } -static inline int br_mdb_add_new(struct net_device *dev, struct nlattr *tb[], - u16 nlmsg_flags, - struct netlink_ext_ack *extack) +static inline int br_mdb_add(struct net_device *dev, struct nlattr *tb[], + u16 nlmsg_flags, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static inline int br_mdb_del_new(struct net_device *dev, struct nlattr *tb[], - struct netlink_ext_ack *extack) +static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static inline int br_mdb_dump_new(struct net_device *dev, struct sk_buff *skb, - struct netlink_callback *cb) +static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { return 0; } -static inline void br_mdb_init(void) -{ -} - -static inline void br_mdb_uninit(void) -{ -} - static inline int br_mdb_hash_init(struct net_bridge *br) { return 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5d8eb57867a96..f347d9fa78c7b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif #include "dev.h" @@ -6063,6 +6066,216 @@ static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct br_port_msg *bpm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); + return -EINVAL; + } + + bpm = nlmsg_data(nlh); + if (bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); + return -EINVAL; + } + + return 0; +} + +struct rtnl_mdb_dump_ctx { + long idx; +}; + +static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx, s_idx; + int err; + + NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + + if (cb->strict_check) { + err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); + if (err) + return err; + } + + s_idx = ctx->idx; + idx = 0; + + for_each_netdev(net, dev) { + if (idx < s_idx) + goto skip; + if (!dev->netdev_ops->ndo_mdb_dump) + goto skip; + + err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb); + if (err == -EMSGSIZE) + goto out; + /* Moving on to next device, reset markers and sequence + * counters since they are all maintained per-device. + */ + memset(cb->ctx, 0, sizeof(cb->ctx)); + cb->prev_seq = 0; + cb->seq = 0; +skip: + idx++; + } + +out: + ctx->idx = idx; + return skb->len; +} + +static int rtnl_validate_mdb_entry(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex == 0) { + NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed"); + return -EINVAL; + } + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast"); + return -EINVAL; + } + if (ipv4_is_local_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast"); + return -EINVAL; + } +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { + NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes"); + return -EINVAL; + } +#endif + } else if (entry->addr.proto == 0) { + /* L2 mdb */ + if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { + NL_SET_ERR_MSG(extack, "L2 entry group is not multicast"); + return -EINVAL; + } + } else { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_add) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); +} + +static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_del) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_del(dev, tb, extack); +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -6297,4 +6510,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, 0); rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); + + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); } -- GitLab From da654c80a0ebba2e2a0614e017c9bbe57f643fe2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:48 +0200 Subject: [PATCH 0528/2078] rtnetlink: bridge: mcast: Relax group address validation in common code In the upcoming VXLAN MDB implementation, the 0.0.0.0 and :: MDB entries will act as catchall entries for unregistered IP multicast traffic in a similar fashion to the 00:00:00:00:00:00 VXLAN FDB entry that is used to transmit BUM traffic. In deployments where inter-subnet multicast forwarding is used, not all the VTEPs in a tenant domain are members in all the broadcast domains. It is therefore advantageous to transmit BULL (broadcast, unknown unicast and link-local multicast) and unregistered IP multicast traffic on different tunnels. If the same tunnel was used, a VTEP only interested in IP multicast traffic would also pull all the BULL traffic and drop it as it is not a member in the originating broadcast domain [1]. Prepare for this change by allowing the 0.0.0.0 group address in the common rtnetlink MDB code and forbid it in the bridge driver. A similar change is not needed for IPv6 because the common code only validates that the group address is not the all-nodes address. [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 6 ++++++ net/core/rtnetlink.c | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 76636c61db21f..7305f5f8215ca 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1246,6 +1246,12 @@ static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev, } } + if (cfg->entry->addr.proto == htons(ETH_P_IP) && + ipv4_is_zeronet(cfg->entry->addr.u.ip4)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address 0.0.0.0 is not allowed"); + return -EINVAL; + } + if (tb[MDBA_SET_ENTRY_ATTRS]) return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg, extack); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f347d9fa78c7b..b7b1661d0d564 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6152,8 +6152,9 @@ static int rtnl_validate_mdb_entry(const struct nlattr *attr, } if (entry->addr.proto == htons(ETH_P_IP)) { - if (!ipv4_is_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast"); + if (!ipv4_is_multicast(entry->addr.u.ip4) && + !ipv4_is_zeronet(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0"); return -EINVAL; } if (ipv4_is_local_multicast(entry->addr.u.ip4)) { -- GitLab From f307c8bf37a346ed3e8b6090b64b4ca8d61e1bcd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:49 +0200 Subject: [PATCH 0529/2078] vxlan: Move address helpers to private headers Move the helpers out of the core C file to the private header so that they could be used by the upcoming MDB code. While at it, constify the second argument of vxlan_nla_get_addr(). Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 47 ------------------------------- drivers/net/vxlan/vxlan_private.h | 45 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index f2c30214cae8e..2c65cc5dd55d7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -71,53 +71,6 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) ip_tunnel_collect_metadata(); } -#if IS_ENABLED(CONFIG_IPV6) -static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) -{ - if (nla_len(nla) >= sizeof(struct in6_addr)) { - ip->sin6.sin6_addr = nla_get_in6_addr(nla); - ip->sa.sa_family = AF_INET6; - return 0; - } else if (nla_len(nla) >= sizeof(__be32)) { - ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); - ip->sa.sa_family = AF_INET; - return 0; - } else { - return -EAFNOSUPPORT; - } -} - -static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, - const union vxlan_addr *ip) -{ - if (ip->sa.sa_family == AF_INET6) - return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr); - else - return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); -} - -#else /* !CONFIG_IPV6 */ - -static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) -{ - if (nla_len(nla) >= sizeof(struct in6_addr)) { - return -EAFNOSUPPORT; - } else if (nla_len(nla) >= sizeof(__be32)) { - ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); - ip->sa.sa_family = AF_INET; - return 0; - } else { - return -EAFNOSUPPORT; - } -} - -static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, - const union vxlan_addr *ip) -{ - return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); -} -#endif - /* Find VXLAN socket based on network namespace, address family, UDP port, * enabled unshareable flags and socket device binding (see l3mdev with * non-default VRF). diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 599c3b4fdd5ed..038528f9684af 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -85,6 +85,31 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; } +static inline int vxlan_nla_get_addr(union vxlan_addr *ip, + const struct nlattr *nla) +{ + if (nla_len(nla) >= sizeof(struct in6_addr)) { + ip->sin6.sin6_addr = nla_get_in6_addr(nla); + ip->sa.sa_family = AF_INET6; + return 0; + } else if (nla_len(nla) >= sizeof(__be32)) { + ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); + ip->sa.sa_family = AF_INET; + return 0; + } else { + return -EAFNOSUPPORT; + } +} + +static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr, + const union vxlan_addr *ip) +{ + if (ip->sa.sa_family == AF_INET6) + return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr); + else + return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); +} + #else /* !CONFIG_IPV6 */ static inline @@ -93,6 +118,26 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; } +static inline int vxlan_nla_get_addr(union vxlan_addr *ip, + const struct nlattr *nla) +{ + if (nla_len(nla) >= sizeof(struct in6_addr)) { + return -EAFNOSUPPORT; + } else if (nla_len(nla) >= sizeof(__be32)) { + ip->sin.sin_addr.s_addr = nla_get_in_addr(nla); + ip->sa.sa_family = AF_INET; + return 0; + } else { + return -EAFNOSUPPORT; + } +} + +static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr, + const union vxlan_addr *ip) +{ + return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); +} + #endif static inline struct vxlan_vni_node * -- GitLab From 6ab271aaad25351ea8587d67c6837678b875eb2c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:50 +0200 Subject: [PATCH 0530/2078] vxlan: Expose vxlan_xmit_one() Given a packet and a remote destination, the function will take care of encapsulating the packet and transmitting it to the destination. Expose it so that it could be used in subsequent patches by the MDB code to transmit a packet to the remote destination(s) stored in the MDB entry. It will allow us to keep the MDB code self-contained, not exposing its data structures to the rest of the VXLAN driver. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 5 ++--- drivers/net/vxlan/vxlan_private.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 2c65cc5dd55d7..5de1a20497a64 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2395,9 +2395,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, return 0; } -static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, - __be32 default_vni, struct vxlan_rdst *rdst, - bool did_rsc) +void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, + __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc) { struct dst_cache *dst_cache; struct ip_tunnel_info *info; diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 038528f9684af..f4977925cb8a4 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -172,6 +172,8 @@ int vxlan_fdb_update(struct vxlan_dev *vxlan, __be16 port, __be32 src_vni, __be32 vni, __u32 ifindex, __u16 ndm_flags, u32 nhid, bool swdev_notify, struct netlink_ext_ack *extack); +void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, + __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc); int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, struct vxlan_config *conf, __be32 vni); -- GitLab From a3a48de5eade770e911d35291217bdd69ce04ef1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:51 +0200 Subject: [PATCH 0531/2078] vxlan: mdb: Add MDB control path support Implement MDB control path support, enabling the creation, deletion, replacement and dumping of MDB entries in a similar fashion to the bridge driver. Unlike the bridge driver, each entry stores a list of remote VTEPs to which matched packets need to be replicated to and not a list of bridge ports. The motivating use case is the installation of MDB entries by a user space control plane in response to received EVPN routes. As such, only allow permanent MDB entries to be installed and do not implement snooping functionality, avoiding a lot of unnecessary complexity. Since entries can only be modified by user space under RTNL, use RTNL as the write lock. Use RCU to ensure that MDB entries and remotes are not freed while being accessed from the data path during transmission. In terms of uAPI, reuse the existing MDB netlink interface, but add a few new attributes to request and response messages: * IP address of the destination VXLAN tunnel endpoint where the multicast receivers reside. * UDP destination port number to use to connect to the remote VXLAN tunnel endpoint. * VXLAN VNI Network Identifier to use to connect to the remote VXLAN tunnel endpoint. Required when Ingress Replication (IR) is used and the remote VTEP is not a member of originating broadcast domain (VLAN/VNI) [1]. * Source VNI Network Identifier the MDB entry belongs to. Used only when the VXLAN device is in external mode. * Interface index of the outgoing interface to reach the remote VXLAN tunnel endpoint. This is required when the underlay destination IP is multicast (P2MP), as the multicast routing tables are not consulted. All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest which is strictly validated by the bridge driver, thereby automatically rejecting the new attributes. [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/Makefile | 2 +- drivers/net/vxlan/vxlan_core.c | 8 + drivers/net/vxlan/vxlan_mdb.c | 1341 +++++++++++++++++++++++++++++ drivers/net/vxlan/vxlan_private.h | 31 + include/net/vxlan.h | 5 + include/uapi/linux/if_bridge.h | 10 + 6 files changed, 1396 insertions(+), 1 deletion(-) create mode 100644 drivers/net/vxlan/vxlan_mdb.c diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile index d4c255499b726..91b8fec8b6cf8 100644 --- a/drivers/net/vxlan/Makefile +++ b/drivers/net/vxlan/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_VXLAN) += vxlan.o -vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o +vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o vxlan_mdb.o diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 5de1a20497a64..a8b26d4f76de7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2878,8 +2878,14 @@ static int vxlan_init(struct net_device *dev) if (err) goto err_free_percpu; + err = vxlan_mdb_init(vxlan); + if (err) + goto err_gro_cells_destroy; + return 0; +err_gro_cells_destroy: + gro_cells_destroy(&vxlan->gro_cells); err_free_percpu: free_percpu(dev->tstats); err_vnigroup_uninit: @@ -2904,6 +2910,8 @@ static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + vxlan_mdb_fini(vxlan); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) vxlan_vnigroup_uninit(vxlan); diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c new file mode 100644 index 0000000000000..129692b3663ff --- /dev/null +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -0,0 +1,1341 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vxlan_private.h" + +struct vxlan_mdb_entry_key { + union vxlan_addr src; + union vxlan_addr dst; + __be32 vni; +}; + +struct vxlan_mdb_entry { + struct rhash_head rhnode; + struct list_head remotes; + struct vxlan_mdb_entry_key key; + struct hlist_node mdb_node; + struct rcu_head rcu; +}; + +#define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0) + +struct vxlan_mdb_remote { + struct list_head list; + struct vxlan_rdst __rcu *rd; + u8 flags; + u8 filter_mode; + u8 rt_protocol; + struct hlist_head src_list; + struct rcu_head rcu; +}; + +#define VXLAN_SGRP_F_DELETE BIT(0) + +struct vxlan_mdb_src_entry { + struct hlist_node node; + union vxlan_addr addr; + u8 flags; +}; + +struct vxlan_mdb_dump_ctx { + long reserved; + long entry_idx; + long remote_idx; +}; + +struct vxlan_mdb_config_src_entry { + union vxlan_addr addr; + struct list_head node; +}; + +struct vxlan_mdb_config { + struct vxlan_dev *vxlan; + struct vxlan_mdb_entry_key group; + struct list_head src_list; + union vxlan_addr remote_ip; + u32 remote_ifindex; + __be32 remote_vni; + __be16 remote_port; + u16 nlflags; + u8 flags; + u8 filter_mode; + u8 rt_protocol; +}; + +static const struct rhashtable_params vxlan_mdb_rht_params = { + .head_offset = offsetof(struct vxlan_mdb_entry, rhnode), + .key_offset = offsetof(struct vxlan_mdb_entry, key), + .key_len = sizeof(struct vxlan_mdb_entry_key), + .automatic_shrinking = true, +}; + +static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, + struct netlink_ext_ack *extack); +static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, + struct netlink_ext_ack *extack); + +static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote, + struct br_mdb_entry *e) +{ + const union vxlan_addr *dst = &mdb_entry->key.dst; + + memset(e, 0, sizeof(*e)); + e->ifindex = vxlan->dev->ifindex; + e->state = MDB_PERMANENT; + + if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED) + e->flags |= MDB_FLAGS_BLOCKED; + + switch (dst->sa.sa_family) { + case AF_INET: + e->addr.u.ip4 = dst->sin.sin_addr.s_addr; + e->addr.proto = htons(ETH_P_IP); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + e->addr.u.ip6 = dst->sin6.sin6_addr; + e->addr.proto = htons(ETH_P_IPV6); + break; +#endif + } +} + +static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb, + const struct vxlan_mdb_remote *remote) +{ + struct vxlan_mdb_src_entry *ent; + struct nlattr *nest; + + if (hlist_empty(&remote->src_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); + if (!nest) + return -EMSGSIZE; + + hlist_for_each_entry(ent, &remote->src_list, node) { + struct nlattr *nest_ent; + + nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); + if (!nest_ent) + goto out_cancel_err; + + if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, + &ent->addr) || + nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0)) + goto out_cancel_err; + + nla_nest_end(skb, nest_ent); + } + + nla_nest_end(skb, nest); + + return 0; + +out_cancel_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan, + struct sk_buff *skb, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote) +{ + struct vxlan_rdst *rd = rtnl_dereference(remote->rd); + struct br_mdb_entry e; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); + if (!nest) + return -EMSGSIZE; + + vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e); + + if (nla_put_nohdr(skb, sizeof(e), &e) || + nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0)) + goto nest_err; + + if (!vxlan_addr_any(&mdb_entry->key.src) && + vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src)) + goto nest_err; + + if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) || + nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) || + vxlan_mdb_entry_info_fill_srcs(skb, remote) || + vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip)) + goto nest_err; + + if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port && + nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT, + be16_to_cpu(rd->remote_port))) + goto nest_err; + + if (rd->remote_vni != vxlan->default_dst.remote_vni && + nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni))) + goto nest_err; + + if (rd->remote_ifindex && + nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex)) + goto nest_err; + + if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && + mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI, + be32_to_cpu(mdb_entry->key.vni))) + goto nest_err; + + nla_nest_end(skb, nest); + + return 0; + +nest_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan, + struct sk_buff *skb, + struct vxlan_mdb_dump_ctx *ctx, + const struct vxlan_mdb_entry *mdb_entry) +{ + int remote_idx = 0, s_remote_idx = ctx->remote_idx; + struct vxlan_mdb_remote *remote; + struct nlattr *nest; + int err = 0; + + nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!nest) + return -EMSGSIZE; + + list_for_each_entry(remote, &mdb_entry->remotes, list) { + if (remote_idx < s_remote_idx) + goto skip; + + err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote); + if (err) + break; +skip: + remote_idx++; + } + + ctx->remote_idx = err ? remote_idx : 0; + nla_nest_end(skb, nest); + return err; +} + +static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb, + struct vxlan_mdb_dump_ctx *ctx) +{ + int entry_idx = 0, s_entry_idx = ctx->entry_idx; + struct vxlan_mdb_entry *mdb_entry; + struct nlattr *nest; + int err = 0; + + nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (!nest) + return -EMSGSIZE; + + hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) { + if (entry_idx < s_entry_idx) + goto skip; + + err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry); + if (err) + break; +skip: + entry_idx++; + } + + ctx->entry_idx = err ? entry_idx : 0; + nla_nest_end(skb, nest); + return err; +} + +int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx; + struct vxlan_dev *vxlan = netdev_priv(dev); + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; + + ASSERT_RTNL(); + + NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx); + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm), + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + + err = vxlan_mdb_fill(vxlan, skb, ctx); + + nlmsg_end(skb, nlh); + + cb->seq = vxlan->mdb_seq; + nl_dump_check_consistent(cb, nlh); + + return err; +} + +static const struct nla_policy +vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = { + [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), +}; + +static const struct nla_policy +vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { + [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol), +}; + +static struct netlink_range_validation vni_range = { + .max = VXLAN_N_VID - 1, +}; + +static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), + [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE, + MCAST_INCLUDE), + [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol), + [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), + [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), + [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, + [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), + [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), +}; + +static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto, + struct netlink_ext_ack *extack) +{ + switch (proto) { + case htons(ETH_P_IP): + if (nla_len(attr) != sizeof(struct in_addr)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length"); + return false; + } + if (ipv4_is_multicast(nla_get_in_addr(attr))) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed"); + return false; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): { + struct in6_addr src; + + if (nla_len(attr) != sizeof(struct in6_addr)) { + NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length"); + return false; + } + src = nla_get_in6_addr(attr); + if (ipv6_addr_is_multicast(&src)) { + NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed"); + return false; + } + break; + } +#endif + default: + NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address"); + return false; + } + + return true; +} + +static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg, + const struct br_mdb_entry *entry, + const struct nlattr *source_attr) +{ + struct vxlan_mdb_entry_key *group = &cfg->group; + + switch (entry->addr.proto) { + case htons(ETH_P_IP): + group->dst.sa.sa_family = AF_INET; + group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + group->dst.sa.sa_family = AF_INET6; + group->dst.sin6.sin6_addr = entry->addr.u.ip6; + break; +#endif + } + + if (source_attr) + vxlan_nla_get_addr(&group->src, source_attr); +} + +static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group) +{ + return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src); +} + +static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group) +{ + return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src); +} + +static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg, + __be16 proto, + const struct nlattr *src_entry, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBE_SRCATTR_MAX + 1]; + struct vxlan_mdb_config_src_entry *src; + int err; + + err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry, + vxlan_mdbe_src_list_entry_pol, extack); + if (err) + return err; + + if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS)) + return -EINVAL; + + if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto, + extack)) + return -EINVAL; + + src = kzalloc(sizeof(*src), GFP_KERNEL); + if (!src) + return -ENOMEM; + + err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]); + if (err) + goto err_free_src; + + list_add_tail(&src->node, &cfg->src_list); + + return 0; + +err_free_src: + kfree(src); + return err; +} + +static void +vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src) +{ + list_del(&src->node); + kfree(src); +} + +static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg, + __be16 proto, + const struct nlattr *src_list, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_config_src_entry *src, *tmp; + struct nlattr *src_entry; + int rem, err; + + nla_for_each_nested(src_entry, src_list, rem) { + err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry, + extack); + if (err) + goto err_src_entry_init; + } + + return 0; + +err_src_entry_init: + list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) + vxlan_mdb_config_src_entry_fini(src); + return err; +} + +static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg) +{ + struct vxlan_mdb_config_src_entry *src, *tmp; + + list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node) + vxlan_mdb_config_src_entry_fini(src); +} + +static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg, + const struct br_mdb_entry *entry, + const struct nlattr *set_attrs, + struct netlink_ext_ack *extack) +{ + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs, + vxlan_mdbe_attrs_pol, extack); + if (err) + return err; + + if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) { + NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address"); + return -EINVAL; + } + + if (mdbe_attrs[MDBE_ATTR_SOURCE] && + !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE], + entry->addr.proto, extack)) + return -EINVAL; + + vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]); + + /* rtnetlink code only validates that IPv4 group address is + * multicast. + */ + if (!vxlan_addr_is_multicast(&cfg->group.dst) && + !vxlan_addr_any(&cfg->group.dst)) { + NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast"); + return -EINVAL; + } + + if (vxlan_addr_any(&cfg->group.dst) && + mdbe_attrs[MDBE_ATTR_SOURCE]) { + NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry"); + return -EINVAL; + } + + if (vxlan_mdb_is_sg(&cfg->group)) + cfg->filter_mode = MCAST_INCLUDE; + + if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { + if (!vxlan_mdb_is_star_g(&cfg->group)) { + NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries"); + return -EINVAL; + } + cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]); + } + + if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) { + if (!vxlan_mdb_is_star_g(&cfg->group)) { + NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries"); + return -EINVAL; + } + if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) { + NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode"); + return -EINVAL; + } + err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto, + mdbe_attrs[MDBE_ATTR_SRC_LIST], + extack); + if (err) + return err; + } + + if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) && + cfg->filter_mode == MCAST_INCLUDE) { + NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list"); + return -EINVAL; + } + + if (mdbe_attrs[MDBE_ATTR_RTPROT]) + cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); + + err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address"); + goto err_src_list_fini; + } + + if (mdbe_attrs[MDBE_ATTR_DST_PORT]) + cfg->remote_port = + cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); + + if (mdbe_attrs[MDBE_ATTR_VNI]) + cfg->remote_vni = + cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); + + if (mdbe_attrs[MDBE_ATTR_IFINDEX]) { + cfg->remote_ifindex = + nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]); + if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) { + NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found"); + err = -EINVAL; + goto err_src_list_fini; + } + } + + if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) + cfg->group.vni = + cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); + + return 0; + +err_src_list_fini: + vxlan_mdb_config_src_list_fini(cfg); + return err; +} + +static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg, + struct net_device *dev, struct nlattr *tb[], + u16 nlmsg_flags, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); + struct vxlan_dev *vxlan = netdev_priv(dev); + + memset(cfg, 0, sizeof(*cfg)); + cfg->vxlan = vxlan; + cfg->group.vni = vxlan->default_dst.remote_vni; + INIT_LIST_HEAD(&cfg->src_list); + cfg->nlflags = nlmsg_flags; + cfg->filter_mode = MCAST_EXCLUDE; + cfg->rt_protocol = RTPROT_STATIC; + cfg->remote_vni = vxlan->default_dst.remote_vni; + cfg->remote_port = vxlan->cfg.dst_port; + + if (entry->ifindex != dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device"); + return -EINVAL; + } + + /* State is not part of the entry key and can be ignored on deletion + * requests. + */ + if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) && + entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags"); + return -EINVAL; + } + + if (entry->vid) { + NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); + return -EINVAL; + } + + if (entry->addr.proto != htons(ETH_P_IP) && + entry->addr.proto != htons(ETH_P_IPV6)) { + NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address"); + return -EINVAL; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute"); + return -EINVAL; + } + + return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS], + extack); +} + +static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg) +{ + vxlan_mdb_config_src_list_fini(cfg); +} + +static struct vxlan_mdb_entry * +vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry_key *group) +{ + return rhashtable_lookup_fast(&vxlan->mdb_tbl, group, + vxlan_mdb_rht_params); +} + +static struct vxlan_mdb_remote * +vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry, + const union vxlan_addr *addr) +{ + struct vxlan_mdb_remote *remote; + + list_for_each_entry(remote, &mdb_entry->remotes, list) { + struct vxlan_rdst *rd = rtnl_dereference(remote->rd); + + if (vxlan_addr_equal(addr, &rd->remote_ip)) + return remote; + } + + return NULL; +} + +static void vxlan_mdb_rdst_free(struct rcu_head *head) +{ + struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); + + dst_cache_destroy(&rd->dst_cache); + kfree(rd); +} + +static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg, + struct vxlan_mdb_remote *remote) +{ + struct vxlan_rdst *rd; + int err; + + rd = kzalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + err = dst_cache_init(&rd->dst_cache, GFP_KERNEL); + if (err) + goto err_free_rdst; + + rd->remote_ip = cfg->remote_ip; + rd->remote_port = cfg->remote_port; + rd->remote_vni = cfg->remote_vni; + rd->remote_ifindex = cfg->remote_ifindex; + rcu_assign_pointer(remote->rd, rd); + + return 0; + +err_free_rdst: + kfree(rd); + return err; +} + +static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd) +{ + call_rcu(&rd->rcu, vxlan_mdb_rdst_free); +} + +static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg, + struct vxlan_mdb_remote *remote) +{ + int err; + + err = vxlan_mdb_remote_rdst_init(cfg, remote); + if (err) + return err; + + remote->flags = cfg->flags; + remote->filter_mode = cfg->filter_mode; + remote->rt_protocol = cfg->rt_protocol; + INIT_HLIST_HEAD(&remote->src_list); + + return 0; +} + +static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan, + struct vxlan_mdb_remote *remote) +{ + WARN_ON_ONCE(!hlist_empty(&remote->src_list)); + vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd)); +} + +static struct vxlan_mdb_src_entry * +vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote, + const union vxlan_addr *addr) +{ + struct vxlan_mdb_src_entry *ent; + + hlist_for_each_entry(ent, &remote->src_list, node) { + if (vxlan_addr_equal(&ent->addr, addr)) + return ent; + } + + return NULL; +} + +static struct vxlan_mdb_src_entry * +vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote, + const union vxlan_addr *addr) +{ + struct vxlan_mdb_src_entry *ent; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return NULL; + + ent->addr = *addr; + hlist_add_head(&ent->node, &remote->src_list); + + return ent; +} + +static void +vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent) +{ + hlist_del(&ent->node); + kfree(ent); +} + +static int +vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg, + const union vxlan_addr *addr, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_config sg_cfg; + + memset(&sg_cfg, 0, sizeof(sg_cfg)); + sg_cfg.vxlan = cfg->vxlan; + sg_cfg.group.src = *addr; + sg_cfg.group.dst = cfg->group.dst; + sg_cfg.group.vni = cfg->group.vni; + INIT_LIST_HEAD(&sg_cfg.src_list); + sg_cfg.remote_ip = cfg->remote_ip; + sg_cfg.remote_ifindex = cfg->remote_ifindex; + sg_cfg.remote_vni = cfg->remote_vni; + sg_cfg.remote_port = cfg->remote_port; + sg_cfg.nlflags = cfg->nlflags; + sg_cfg.filter_mode = MCAST_INCLUDE; + if (cfg->filter_mode == MCAST_EXCLUDE) + sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED; + sg_cfg.rt_protocol = cfg->rt_protocol; + + return __vxlan_mdb_add(&sg_cfg, extack); +} + +static void +vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry_key *group, + const struct vxlan_mdb_remote *remote, + const union vxlan_addr *addr) +{ + struct vxlan_rdst *rd = rtnl_dereference(remote->rd); + struct vxlan_mdb_config sg_cfg; + + memset(&sg_cfg, 0, sizeof(sg_cfg)); + sg_cfg.vxlan = vxlan; + sg_cfg.group.src = *addr; + sg_cfg.group.dst = group->dst; + sg_cfg.group.vni = group->vni; + INIT_LIST_HEAD(&sg_cfg.src_list); + sg_cfg.remote_ip = rd->remote_ip; + + __vxlan_mdb_del(&sg_cfg, NULL); +} + +static int +vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg, + struct vxlan_mdb_remote *remote, + const struct vxlan_mdb_config_src_entry *src, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_src_entry *ent; + int err; + + ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr); + if (!ent) { + ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr); + if (!ent) + return -ENOMEM; + } else if (!(cfg->nlflags & NLM_F_REPLACE)) { + NL_SET_ERR_MSG_MOD(extack, "Source entry already exists"); + return -EEXIST; + } + + err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack); + if (err) + goto err_src_del; + + /* Clear flags in case source entry was marked for deletion as part of + * replace flow. + */ + ent->flags = 0; + + return 0; + +err_src_del: + vxlan_mdb_remote_src_entry_del(ent); + return err; +} + +static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry_key *group, + const struct vxlan_mdb_remote *remote, + struct vxlan_mdb_src_entry *ent) +{ + vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr); + vxlan_mdb_remote_src_entry_del(ent); +} + +static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg, + struct vxlan_mdb_remote *remote, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_config_src_entry *src; + struct vxlan_mdb_src_entry *ent; + struct hlist_node *tmp; + int err; + + list_for_each_entry(src, &cfg->src_list, node) { + err = vxlan_mdb_remote_src_add(cfg, remote, src, extack); + if (err) + goto err_src_del; + } + + return 0; + +err_src_del: + hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) + vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent); + return err; +} + +static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry_key *group, + struct vxlan_mdb_remote *remote) +{ + struct vxlan_mdb_src_entry *ent; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) + vxlan_mdb_remote_src_del(vxlan, group, remote, ent); +} + +static size_t +vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group, + const struct vxlan_mdb_remote *remote) +{ + struct vxlan_mdb_src_entry *ent; + size_t nlmsg_size; + + if (hlist_empty(&remote->src_list)) + return 0; + + /* MDBA_MDB_EATTR_SRC_LIST */ + nlmsg_size = nla_total_size(0); + + hlist_for_each_entry(ent, &remote->src_list, node) { + /* MDBA_MDB_SRCLIST_ENTRY */ + nlmsg_size += nla_total_size(0) + + /* MDBA_MDB_SRCATTR_ADDRESS */ + nla_total_size(vxlan_addr_size(&group->dst)) + + /* MDBA_MDB_SRCATTR_TIMER */ + nla_total_size(sizeof(u8)); + } + + return nlmsg_size; +} + +static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote) +{ + const struct vxlan_mdb_entry_key *group = &mdb_entry->key; + struct vxlan_rdst *rd = rtnl_dereference(remote->rd); + size_t nlmsg_size; + + nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY_INFO */ + nla_total_size(sizeof(struct br_mdb_entry)) + + /* MDBA_MDB_EATTR_TIMER */ + nla_total_size(sizeof(u32)); + /* MDBA_MDB_EATTR_SOURCE */ + if (vxlan_mdb_is_sg(group)) + nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst)); + /* MDBA_MDB_EATTR_RTPROT */ + nlmsg_size += nla_total_size(sizeof(u8)); + /* MDBA_MDB_EATTR_SRC_LIST */ + nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote); + /* MDBA_MDB_EATTR_GROUP_MODE */ + nlmsg_size += nla_total_size(sizeof(u8)); + /* MDBA_MDB_EATTR_DST */ + nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip)); + /* MDBA_MDB_EATTR_DST_PORT */ + if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port) + nlmsg_size += nla_total_size(sizeof(u16)); + /* MDBA_MDB_EATTR_VNI */ + if (rd->remote_vni != vxlan->default_dst.remote_vni) + nlmsg_size += nla_total_size(sizeof(u32)); + /* MDBA_MDB_EATTR_IFINDEX */ + if (rd->remote_ifindex) + nlmsg_size += nla_total_size(sizeof(u32)); + /* MDBA_MDB_EATTR_SRC_VNI */ + if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni) + nlmsg_size += nla_total_size(sizeof(u32)); + + return nlmsg_size; +} + +static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan, + struct sk_buff *skb, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote, + int type) +{ + struct nlattr *mdb_nest, *mdb_entry_nest; + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = vxlan->dev->ifindex; + + mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (!mdb_nest) + goto cancel; + mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!mdb_entry_nest) + goto cancel; + + if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote)) + goto cancel; + + nla_nest_end(skb, mdb_entry_nest); + nla_nest_end(skb, mdb_nest); + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_remote *remote, + int type) +{ + struct net *net = dev_net(vxlan->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote), + GFP_KERNEL); + if (!skb) + goto errout; + + err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type); + if (err) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +static int +vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg, + const struct vxlan_mdb_entry *mdb_entry, + struct vxlan_mdb_remote *remote, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = cfg->vxlan; + struct vxlan_mdb_src_entry *ent; + struct hlist_node *tmp; + int err; + + hlist_for_each_entry(ent, &remote->src_list, node) + ent->flags |= VXLAN_SGRP_F_DELETE; + + err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); + if (err) + goto err_clear_delete; + + hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) { + if (ent->flags & VXLAN_SGRP_F_DELETE) + vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote, + ent); + } + + return 0; + +err_clear_delete: + hlist_for_each_entry(ent, &remote->src_list, node) + ent->flags &= ~VXLAN_SGRP_F_DELETE; + return err; +} + +static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg, + const struct vxlan_mdb_entry *mdb_entry, + struct vxlan_mdb_remote *remote, + struct netlink_ext_ack *extack) +{ + struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd); + struct vxlan_dev *vxlan = cfg->vxlan; + int err; + + err = vxlan_mdb_remote_rdst_init(cfg, remote); + if (err) + return err; + new_rd = rtnl_dereference(remote->rd); + + err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack); + if (err) + goto err_rdst_reset; + + WRITE_ONCE(remote->flags, cfg->flags); + WRITE_ONCE(remote->filter_mode, cfg->filter_mode); + remote->rt_protocol = cfg->rt_protocol; + vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB); + + vxlan_mdb_remote_rdst_fini(old_rd); + + return 0; + +err_rdst_reset: + rcu_assign_pointer(remote->rd, old_rd); + vxlan_mdb_remote_rdst_fini(new_rd); + return err; +} + +static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg, + struct vxlan_mdb_entry *mdb_entry, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_remote *remote; + int err; + + remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); + if (remote) { + if (!(cfg->nlflags & NLM_F_REPLACE)) { + NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists"); + return -EEXIST; + } + return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack); + } + + if (!(cfg->nlflags & NLM_F_CREATE)) { + NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist"); + return -ENOENT; + } + + remote = kzalloc(sizeof(*remote), GFP_KERNEL); + if (!remote) + return -ENOMEM; + + err = vxlan_mdb_remote_init(cfg, remote); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry"); + goto err_free_remote; + } + + err = vxlan_mdb_remote_srcs_add(cfg, remote, extack); + if (err) + goto err_remote_fini; + + list_add_rcu(&remote->list, &mdb_entry->remotes); + vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB); + + return 0; + +err_remote_fini: + vxlan_mdb_remote_fini(cfg->vxlan, remote); +err_free_remote: + kfree(remote); + return err; +} + +static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan, + struct vxlan_mdb_entry *mdb_entry, + struct vxlan_mdb_remote *remote) +{ + vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB); + list_del_rcu(&remote->list); + vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote); + vxlan_mdb_remote_fini(vxlan, remote); + kfree_rcu(remote, rcu); +} + +static struct vxlan_mdb_entry * +vxlan_mdb_entry_get(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry_key *group) +{ + struct vxlan_mdb_entry *mdb_entry; + int err; + + mdb_entry = vxlan_mdb_entry_lookup(vxlan, group); + if (mdb_entry) + return mdb_entry; + + mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL); + if (!mdb_entry) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&mdb_entry->remotes); + memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key)); + hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list); + + err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl, + &mdb_entry->rhnode, + vxlan_mdb_rht_params); + if (err) + goto err_free_entry; + + return mdb_entry; + +err_free_entry: + hlist_del(&mdb_entry->mdb_node); + kfree(mdb_entry); + return ERR_PTR(err); +} + +static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan, + struct vxlan_mdb_entry *mdb_entry) +{ + if (!list_empty(&mdb_entry->remotes)) + return; + + rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, + vxlan_mdb_rht_params); + hlist_del(&mdb_entry->mdb_node); + kfree_rcu(mdb_entry, rcu); +} + +static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = cfg->vxlan; + struct vxlan_mdb_entry *mdb_entry; + int err; + + mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group); + if (IS_ERR(mdb_entry)) + return PTR_ERR(mdb_entry); + + err = vxlan_mdb_remote_add(cfg, mdb_entry, extack); + if (err) + goto err_entry_put; + + vxlan->mdb_seq++; + + return 0; + +err_entry_put: + vxlan_mdb_entry_put(vxlan, mdb_entry); + return err; +} + +static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = cfg->vxlan; + struct vxlan_mdb_entry *mdb_entry; + struct vxlan_mdb_remote *remote; + + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group); + if (!mdb_entry) { + NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry"); + return -ENOENT; + } + + remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip); + if (!remote) { + NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry"); + return -ENOENT; + } + + vxlan_mdb_remote_del(vxlan, mdb_entry, remote); + vxlan_mdb_entry_put(vxlan, mdb_entry); + + vxlan->mdb_seq++; + + return 0; +} + +int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_config cfg; + int err; + + ASSERT_RTNL(); + + err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); + if (err) + return err; + + err = __vxlan_mdb_add(&cfg, extack); + + vxlan_mdb_config_fini(&cfg); + return err; +} + +int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct vxlan_mdb_config cfg; + int err; + + ASSERT_RTNL(); + + err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack); + if (err) + return err; + + err = __vxlan_mdb_del(&cfg, extack); + + vxlan_mdb_config_fini(&cfg); + return err; +} + +static void vxlan_mdb_check_empty(void *ptr, void *arg) +{ + WARN_ON_ONCE(1); +} + +static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan, + struct vxlan_mdb_entry *mdb_entry) +{ + struct vxlan_mdb_remote *remote, *tmp; + + list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) + vxlan_mdb_remote_del(vxlan, mdb_entry, remote); +} + +static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan) +{ + struct vxlan_mdb_entry *mdb_entry; + struct hlist_node *tmp; + + /* The removal of an entry cannot trigger the removal of another entry + * since entries are always added to the head of the list. + */ + hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) { + vxlan_mdb_remotes_flush(vxlan, mdb_entry); + vxlan_mdb_entry_put(vxlan, mdb_entry); + } +} + +int vxlan_mdb_init(struct vxlan_dev *vxlan) +{ + int err; + + err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params); + if (err) + return err; + + INIT_HLIST_HEAD(&vxlan->mdb_list); + + return 0; +} + +void vxlan_mdb_fini(struct vxlan_dev *vxlan) +{ + vxlan_mdb_entries_flush(vxlan); + rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty, + NULL); +} diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index f4977925cb8a4..7bcc38faae274 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -110,6 +110,14 @@ static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr, return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); } +static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip) +{ + if (ip->sa.sa_family == AF_INET6) + return ipv6_addr_is_multicast(&ip->sin6.sin6_addr); + else + return ipv4_is_multicast(ip->sin.sin_addr.s_addr); +} + #else /* !CONFIG_IPV6 */ static inline @@ -138,8 +146,21 @@ static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr, return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr); } +static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip) +{ + return ipv4_is_multicast(ip->sin.sin_addr.s_addr); +} + #endif +static inline size_t vxlan_addr_size(const union vxlan_addr *ip) +{ + if (ip->sa.sa_family == AF_INET6) + return sizeof(struct in6_addr); + else + return sizeof(__be32); +} + static inline struct vxlan_vni_node * vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni) { @@ -206,4 +227,14 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, int rifindex); int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, int rifindex); + +/* vxlan_mdb.c */ +int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb); +int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack); +int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); +int vxlan_mdb_init(struct vxlan_dev *vxlan); +void vxlan_mdb_fini(struct vxlan_dev *vxlan); #endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index bca5b01af2475..110b703d89782 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -3,6 +3,7 @@ #define __NET_VXLAN_H 1 #include +#include #include #include #include @@ -302,6 +303,10 @@ struct vxlan_dev { struct vxlan_vni_group __rcu *vnigrp; struct hlist_head fdb_head[FDB_HASH_SIZE]; + + struct rhashtable mdb_tbl; + struct hlist_head mdb_list; + unsigned int mdb_seq; }; #define VXLAN_F_LEARN 0x01 diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d60c456710b3e..c9d624f528c59 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -633,6 +633,11 @@ enum { MDBA_MDB_EATTR_GROUP_MODE, MDBA_MDB_EATTR_SOURCE, MDBA_MDB_EATTR_RTPROT, + MDBA_MDB_EATTR_DST, + MDBA_MDB_EATTR_DST_PORT, + MDBA_MDB_EATTR_VNI, + MDBA_MDB_EATTR_IFINDEX, + MDBA_MDB_EATTR_SRC_VNI, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) @@ -728,6 +733,11 @@ enum { MDBE_ATTR_SRC_LIST, MDBE_ATTR_GROUP_MODE, MDBE_ATTR_RTPROT, + MDBE_ATTR_DST, + MDBE_ATTR_DST_PORT, + MDBE_ATTR_VNI, + MDBE_ATTR_IFINDEX, + MDBE_ATTR_SRC_VNI, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) -- GitLab From bc6c6b013ffee36eb555cc0a68aa3d9608e1fad2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:52 +0200 Subject: [PATCH 0532/2078] vxlan: mdb: Add an internal flag to indicate MDB usage Add an internal flag to indicate whether MDB entries are configured or not. Set the flag after installing the first MDB entry and clear it before deleting the last one. The flag will be consulted by the data path which will only perform an MDB lookup if the flag is set, thereby keeping the MDB overhead to a minimum when the MDB is not used. Another option would have been to use a static key, but it is global and not per-device, unlike the current approach. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_mdb.c | 7 +++++++ include/net/vxlan.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 129692b3663ff..b32b1fb4a74a2 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -1185,6 +1185,9 @@ vxlan_mdb_entry_get(struct vxlan_dev *vxlan, if (err) goto err_free_entry; + if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) + vxlan->cfg.flags |= VXLAN_F_MDB; + return mdb_entry; err_free_entry: @@ -1199,6 +1202,9 @@ static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan, if (!list_empty(&mdb_entry->remotes)) return; + if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list)) + vxlan->cfg.flags &= ~VXLAN_F_MDB; + rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode, vxlan_mdb_rht_params); hlist_del(&mdb_entry->mdb_node); @@ -1336,6 +1342,7 @@ int vxlan_mdb_init(struct vxlan_dev *vxlan) void vxlan_mdb_fini(struct vxlan_dev *vxlan) { vxlan_mdb_entries_flush(vxlan); + WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB); rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty, NULL); } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 110b703d89782..b7b2e9abfb373 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -327,6 +327,7 @@ struct vxlan_dev { #define VXLAN_F_IPV6_LINKLOCAL 0x8000 #define VXLAN_F_TTL_INHERIT 0x10000 #define VXLAN_F_VNIFILTER 0x20000 +#define VXLAN_F_MDB 0x40000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable -- GitLab From 0f83e69f44bf8dc8ab48ff0196b3475c1f0f6c07 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:53 +0200 Subject: [PATCH 0533/2078] vxlan: Add MDB data path support Integrate MDB support into the Tx path of the VXLAN driver, allowing it to selectively forward IP multicast traffic according to the matched MDB entry. If MDB entries are configured (i.e., 'VXLAN_F_MDB' is set) and the packet is an IP multicast packet, perform up to three different lookups according to the following priority: 1. For an (S, G) entry, using {Source VNI, Source IP, Destination IP}. 2. For a (*, G) entry, using {Source VNI, Destination IP}. 3. For the catchall MDB entry (0.0.0.0 or ::), using the source VNI. The catchall MDB entry is similar to the catchall FDB entry (00:00:00:00:00:00) that is currently used to transmit BUM (broadcast, unknown unicast and multicast) traffic. However, unlike the catchall FDB entry, this entry is only used to transmit unregistered IP multicast traffic that is not link-local. Therefore, when configured, the catchall FDB entry will only transmit BULL (broadcast, unknown unicast, link-local multicast) traffic. The catchall MDB entry is useful in deployments where inter-subnet multicast forwarding is used and not all the VTEPs in a tenant domain are members in all the broadcast domains. In such deployments it is advantageous to transmit BULL (broadcast, unknown unicast and link-local multicast) and unregistered IP multicast traffic on different tunnels. If the same tunnel was used, a VTEP only interested in IP multicast traffic would also pull all the BULL traffic and drop it as it is not a member in the originating broadcast domain [1]. If the packet did not match an MDB entry (or if the packet is not an IP multicast packet), return it to the Tx path, allowing it to be forwarded according to the FDB. If the packet did match an MDB entry, forward it to the associated remote VTEPs. However, if the entry is a (*, G) entry and the associated remote is in INCLUDE mode, then skip over it as the source IP is not in its source list (otherwise the packet would have matched on an (S, G) entry). Similarly, if the associated remote is marked as BLOCKED (can only be set on (S, G) entries), then skip over it as well as the remote is in EXCLUDE mode and the source IP is in its source list. [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 15 ++++ drivers/net/vxlan/vxlan_mdb.c | 114 ++++++++++++++++++++++++++++++ drivers/net/vxlan/vxlan_private.h | 6 ++ 3 files changed, 135 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index a8b26d4f76de7..8450768d23004 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2743,6 +2743,21 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) #endif } + if (vxlan->cfg.flags & VXLAN_F_MDB) { + struct vxlan_mdb_entry *mdb_entry; + + rcu_read_lock(); + mdb_entry = vxlan_mdb_entry_skb_get(vxlan, skb, vni); + if (mdb_entry) { + netdev_tx_t ret; + + ret = vxlan_mdb_xmit(vxlan, mdb_entry, skb); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + } + eth = eth_hdr(skb); f = vxlan_find_mac(vxlan, eth->h_dest, vni); did_rsc = false; diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index b32b1fb4a74a2..5e041622261a5 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -1298,6 +1298,120 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, + struct sk_buff *skb, + __be32 src_vni) +{ + struct vxlan_mdb_entry *mdb_entry; + struct vxlan_mdb_entry_key group; + + if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) || + is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) + return NULL; + + /* When not in collect metadata mode, 'src_vni' is zero, but MDB + * entries are stored with the VNI of the VXLAN device. + */ + if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) + src_vni = vxlan->default_dst.remote_vni; + + memset(&group, 0, sizeof(group)); + group.vni = src_vni; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return NULL; + group.dst.sa.sa_family = AF_INET; + group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr; + group.src.sa.sa_family = AF_INET; + group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return NULL; + group.dst.sa.sa_family = AF_INET6; + group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr; + group.src.sa.sa_family = AF_INET6; + group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr; + break; +#endif + default: + return NULL; + } + + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); + if (mdb_entry) + return mdb_entry; + + memset(&group.src, 0, sizeof(group.src)); + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); + if (mdb_entry) + return mdb_entry; + + /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if + * the destination IP address is not link-local multicast since we want + * to transmit such traffic together with broadcast and unknown unicast + * traffic. + */ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr)) + return NULL; + group.dst.sin.sin_addr.s_addr = 0; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (ipv6_addr_type(&group.dst.sin6.sin6_addr) & + IPV6_ADDR_LINKLOCAL) + return NULL; + memset(&group.dst.sin6.sin6_addr, 0, + sizeof(group.dst.sin6.sin6_addr)); + break; +#endif + default: + return NULL; + } + + return vxlan_mdb_entry_lookup(vxlan, &group); +} + +netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + struct sk_buff *skb) +{ + struct vxlan_mdb_remote *remote, *fremote = NULL; + __be32 src_vni = mdb_entry->key.vni; + + list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) { + struct sk_buff *skb1; + + if ((vxlan_mdb_is_star_g(&mdb_entry->key) && + READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) || + (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED)) + continue; + + if (!fremote) { + fremote = remote; + continue; + } + + skb1 = skb_clone(skb, GFP_ATOMIC); + if (skb1) + vxlan_xmit_one(skb1, vxlan->dev, src_vni, + rcu_dereference(remote->rd), false); + } + + if (fremote) + vxlan_xmit_one(skb, vxlan->dev, src_vni, + rcu_dereference(fremote->rd), false); + else + kfree_skb(skb); + + return NETDEV_TX_OK; +} + static void vxlan_mdb_check_empty(void *ptr, void *arg) { WARN_ON_ONCE(1); diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 7bcc38faae274..817fa3075842e 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -235,6 +235,12 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, + struct sk_buff *skb, + __be32 src_vni); +netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan, + const struct vxlan_mdb_entry *mdb_entry, + struct sk_buff *skb); int vxlan_mdb_init(struct vxlan_dev *vxlan); void vxlan_mdb_fini(struct vxlan_dev *vxlan); #endif -- GitLab From 08f876a7d79ed235f90af0373d1e548a71c1f4f6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:54 +0200 Subject: [PATCH 0534/2078] vxlan: Enable MDB support Now that the VXLAN MDB control and data paths are in place we can expose the VXLAN MDB functionality to user space. Set the VXLAN MDB net device operations to the appropriate functions, thereby allowing the rtnetlink code to reach the VXLAN driver. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8450768d23004..e2e5f5dac7e63 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3083,6 +3083,9 @@ static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_fdb_del = vxlan_fdb_delete, .ndo_fdb_dump = vxlan_fdb_dump, .ndo_fdb_get = vxlan_fdb_get, + .ndo_mdb_add = vxlan_mdb_add, + .ndo_mdb_del = vxlan_mdb_del, + .ndo_mdb_dump = vxlan_mdb_dump, .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, }; -- GitLab From 62199e3f16583e766f46d1767deca109fd8ea408 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:55 +0200 Subject: [PATCH 0535/2078] selftests: net: Add VXLAN MDB test Add test cases for VXLAN MDB, testing the control and data paths. Two different sets of namespaces (i.e., ns{1,2}_v4 and ns{1,2}_v6) are used in order to test VXLAN MDB with both IPv4 and IPv6 underlays, respectively. Example truncated output: # ./test_vxlan_mdb.sh [...] Tests passed: 620 Tests failed: 0 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 1 + tools/testing/selftests/net/test_vxlan_mdb.sh | 2318 +++++++++++++++++ 3 files changed, 2320 insertions(+) create mode 100755 tools/testing/selftests/net/test_vxlan_mdb.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 099741290184e..a179fbd6f9721 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -81,6 +81,7 @@ TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += ip_local_port_range +TEST_PROGS += test_vxlan_mdb.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cc9fd55ab8699..4c7ce07afa2f9 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -48,3 +48,4 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_VXLAN=m diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh new file mode 100755 index 0000000000000..31e5f0f8859d1 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -0,0 +1,2318 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking VXLAN MDB functionality. The topology consists of +# two sets of namespaces: One for the testing of IPv4 underlay and another for +# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested. +# +# Data path functionality is tested by sending traffic from one of the upper +# namespaces and checking using ingress tc filters that the expected traffic +# was received by one of the lower namespaces. +# +# +------------------------------------+ +------------------------------------+ +# | ns1_v4 | | ns1_v6 | +# | | | | +# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 | +# | + + + | | + + + | +# | | | | | | | | | | +# | | | | | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | vx0 | | vx0 | +# | | | | +# | | | | +# | veth0 | | veth0 | +# | + | | + | +# +-----------------|------------------+ +-----------------|------------------+ +# | | +# +-----------------|------------------+ +-----------------|------------------+ +# | + | | + | +# | veth0 | | veth0 | +# | | | | +# | | | | +# | vx0 | | vx0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | | | | | +# | | | | | | | | | | +# | + + + | | + + + | +# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 | +# | | | | +# | ns2_v4 | | ns2_v6 | +# +------------------------------------+ +------------------------------------+ + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +CONTROL_PATH_TESTS=" + basic_star_g_ipv4_ipv4 + basic_star_g_ipv6_ipv4 + basic_star_g_ipv4_ipv6 + basic_star_g_ipv6_ipv6 + basic_sg_ipv4_ipv4 + basic_sg_ipv6_ipv4 + basic_sg_ipv4_ipv6 + basic_sg_ipv6_ipv6 + star_g_ipv4_ipv4 + star_g_ipv6_ipv4 + star_g_ipv4_ipv6 + star_g_ipv6_ipv6 + sg_ipv4_ipv4 + sg_ipv6_ipv4 + sg_ipv4_ipv6 + sg_ipv6_ipv6 + dump_ipv4_ipv4 + dump_ipv6_ipv4 + dump_ipv4_ipv6 + dump_ipv6_ipv6 +" + +DATA_PATH_TESTS=" + encap_params_ipv4_ipv4 + encap_params_ipv6_ipv4 + encap_params_ipv4_ipv6 + encap_params_ipv6_ipv6 + starg_exclude_ir_ipv4_ipv4 + starg_exclude_ir_ipv6_ipv4 + starg_exclude_ir_ipv4_ipv6 + starg_exclude_ir_ipv6_ipv6 + starg_include_ir_ipv4_ipv4 + starg_include_ir_ipv6_ipv4 + starg_include_ir_ipv4_ipv6 + starg_include_ir_ipv6_ipv6 + starg_exclude_p2mp_ipv4_ipv4 + starg_exclude_p2mp_ipv6_ipv4 + starg_exclude_p2mp_ipv4_ipv6 + starg_exclude_p2mp_ipv6_ipv6 + starg_include_p2mp_ipv4_ipv4 + starg_include_p2mp_ipv6_ipv4 + starg_include_p2mp_ipv4_ipv6 + starg_include_p2mp_ipv6_ipv6 + egress_vni_translation_ipv4_ipv4 + egress_vni_translation_ipv6_ipv4 + egress_vni_translation_ipv4_ipv6 + egress_vni_translation_ipv6_ipv6 + all_zeros_mdb_ipv4 + all_zeros_mdb_ipv6 + mdb_fdb_ipv4_ipv4 + mdb_fdb_ipv6_ipv4 + mdb_fdb_ipv4_ipv6 + mdb_fdb_ipv6_ipv6 + mdb_torture_ipv4_ipv4 + mdb_torture_ipv6_ipv4 + mdb_torture_ipv4_ipv6 + mdb_torture_ipv6_ipv6 +" + +# All tests in this script. Can be overridden with -t option. +TESTS=" + $CONTROL_PATH_TESTS + $DATA_PATH_TESTS +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_common_ns() +{ + local ns=$1; shift + local local_addr=$1; shift + + ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1 + ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 + + ip -n $ns link set dev lo up + ip -n $ns address add $local_addr dev lo + + ip -n $ns link set dev veth0 up + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + + ip -n $ns link add link br0 name br0.20 up type vlan id 20 + bridge -n $ns vlan add vid 20 dev br0 self + + ip -n $ns link add link br0 name br0.4000 up type vlan id 4000 + bridge -n $ns vlan add vid 4000 dev br0 self + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 external vnifilter + bridge -n $ns link set dev vx0 vlan_tunnel on + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 + bridge -n $ns vni add vni 10010 dev vx0 + + bridge -n $ns vlan add vid 20 dev vx0 + bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020 + bridge -n $ns vni add vni 10020 dev vx0 + + bridge -n $ns vlan add vid 4000 dev vx0 pvid + bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000 + bridge -n $ns vni add vni 14000 dev vx0 +} + +setup_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local local_addr1=$1; shift + local local_addr2=$1; shift + + ip netns add $ns1 + ip netns add $ns2 + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $ns1 name veth0 + ip link set dev veth1 netns $ns2 name veth0 + + setup_common_ns $ns1 $local_addr1 + setup_common_ns $ns2 $local_addr2 +} + +setup_v4() +{ + setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + + ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + + ip -n ns1_v4 route add default via 192.0.2.18 + ip -n ns2_v4 route add default via 192.0.2.17 +} + +cleanup_v4() +{ + ip netns del ns2_v4 + ip netns del ns1_v4 +} + +setup_v6() +{ + setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + + ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + + ip -n ns1_v6 route add default via 2001:db8:2::2 + ip -n ns2_v6 route add default via 2001:db8:2::1 +} + +cleanup_v6() +{ + ip netns del ns2_v6 + ip netns del ns1_v6 +} + +setup() +{ + set -e + + setup_v4 + setup_v6 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_v6 &> /dev/null + cleanup_v4 &> /dev/null +} + +################################################################################ +# Tests - Control path + +basic_common() +{ + local ns1=$1; shift + local grp_key=$1; shift + local vtep_ip=$1; shift + + # Test basic control path operations common to all MDB entry types. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 1 "MDB entry presence after deletion" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 255 "Non-existent MDB entry deletion" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\"" + log_test $? 0 "MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\"" + log_test $? 0 "MDB entry protocol replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \"" + log_test $? 1 "MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\"" + log_test $? 0 "MDB entry destination port replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \"" + log_test $? 1 "MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\"" + log_test $? 0 "MDB entry destination VNI replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \"" + log_test $? 1 "MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\"" + log_test $? 0 "MDB entry outgoing interface replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Common error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with mismatch between device and port" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with temp state" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with VLAN" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry MAC address" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent" + log_test $? 255 "MDB entry without extended parameters" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with an invalid protocol" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010" + log_test $? 255 "MDB entry with an invalid destination VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))" + log_test $? 255 "MDB entry with an invalid source VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010" + log_test $? 255 "MDB entry without a remote destination IP" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "Duplicate MDB entries" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" +} + +basic_star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +star_g_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src1=$1; shift + local src2=$1; shift + local src3=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (*, G) entries. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry addition with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry replacement with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 1 "(*, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 1 "(S, G) MDB entry presence after deletion" + + # Default filter mode and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\"" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\"" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default source list and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list" + log_test $? 1 "(*, G) MDB entry default source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 1 "(S, G) MDB entry of 2nd source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after removing source" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\"" + log_test $? 0 "(*, G) MDB entry default protocol" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\"" + log_test $? 0 "(S, G) MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\"" + log_test $? 0 "(*, G) MDB entry protocol after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\"" + log_test $? 0 "(S, G) MDB entry protocol after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \"" + log_test $? 1 "(*, G) MDB entry default destination port" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \"" + log_test $? 1 "(S, G) MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(*, G) MDB entry destination port after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(S, G) MDB entry destination port after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \"" + log_test $? 1 "(*, G) MDB entry default destination VNI" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \"" + log_test $? 1 "(S, G) MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(*, G) MDB entry destination VNI after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(S, G) MDB entry destination VNI after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \"" + log_test $? 1 "(*, G) MDB entry default outgoing interface" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \"" + log_test $? 1 "(S, G) MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \"" + log_test $? 0 "(*, G) MDB entry outgoing interface after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \"" + log_test $? 0 "(S, G) MDB entry outgoing interface after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(*, G) INCLUDE with an empty source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010" + log_test $? 255 "Invalid source in source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "Source list without filter mode" +} + +star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +sg_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (S, G) entries. + + # Default filter mode. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(S, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with an invalid source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source" +} + +sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +dump_common() +{ + local ns1=$1; shift + local local_addr=$1; shift + local remote_prefix=$1; shift + local fn=$1; shift + local max_vxlan_devs=2 + local max_remotes=64 + local max_grps=256 + local num_entries + local batch_file + local grp + local i j + + # The kernel maintains various markers for the MDB dump. Add a test for + # large scale MDB dump to make sure that all the configured entries are + # dumped and that the markers are used correctly. + + # Create net devices. + for i in $(seq 1 $max_vxlan_devs); do + ip -n $ns1 link add name vx-test${i} up type vxlan \ + local $local_addr dstport 4789 external vnifilter + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_vxlan_devs); do + for j in $(seq 1 $max_remotes); do + for grp in $($fn $max_grps); do + echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -n $ns1 -b $batch_file + for i in $(seq 1 $max_vxlan_devs); do + num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_remotes)) ]] + log_test $? 0 "Large scale dump - VXLAN device #$i" + done + + rm -rf $batch_file +} + +dump_ipv4_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv4_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +################################################################################ +# Tests - Data path + +encap_params_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local enc_ethtype=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Test that packets forwarded by the VXLAN MDB are encapsulated with + # the correct parameters. Transmit packets from the first namespace and + # check that they hit the corresponding filters on the ingress of the + # second namespace. + + run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact" + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Check destination IP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check destination port. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - no match" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - no match" + + run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check default VNI. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - no match" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" +} + +encap_params_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +encap_params_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +starg_exclude_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is not forwarded + # and that a source not in the list is forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_exclude_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is forwarded and + # that a source not in the list is not forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_include_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is not forwarded and that a source not in the list is + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_exclude_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is forwarded and that a source not in the list is not + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_include_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +egress_vni_translation_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # When P2MP tunnels are used with optimized inter-subnet multicast + # (OISM) [1], the ingress VTEP does not perform VNI translation and + # uses the VNI of the source broadcast domain (BD). If the egress VTEP + # is a member in the source BD, then no VNI translation is needed. + # Otherwise, the egress VTEP needs to translate the VNI to the + # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI. + # + # In this test, remove the VTEP in the second namespace from VLAN 10 + # (VNI 10010) and make sure that a packet sent from this VLAN on the + # first VTEP is received by the SVI corresponding to the L3VNI (14000 / + # VLAN 4000) on the second VTEP. + # + # The second VTEP will be able to decapsulate the packet with VNI 10010 + # because this VNI is configured on its shared VXLAN device. Later, + # when ingressing the bridge, the VNI to VLAN lookup will fail because + # the VTEP is not a member in VLAN 10, which will cause the packet to + # be tagged with VLAN 4000 since it is configured as PVID. + # + # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast + + run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0" + + # Remove the second VTEP from VLAN 10. + run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0" + + # Make sure that packets sent from the first VTEP over VLAN 10 are + # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on + # the second VTEP, since it is configured as PVID. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - PVID configured" + + # Remove PVID flag from VLAN 4000 on the second VTEP and make sure + # packets are no longer received by the SVI interface. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - no PVID configured" + + # Reconfigure the PVID and make sure packets are received again. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 + log_test $? 0 "Egress VNI translation - PVID reconfigured" +} + +egress_vni_translation_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +egress_vni_translation_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +all_zeros_mdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local vtep3_ip=$1; shift + local vtep4_ip=$1; shift + local plen=$1; shift + local ipv4_grp=239.1.1.1 + local ipv4_unreg_grp=239.2.2.2 + local ipv4_ll_grp=224.0.0.100 + local ipv4_src=192.0.2.129 + local ipv6_grp=ff0e::1 + local ipv6_unreg_grp=ff0e::2 + local ipv6_ll_grp=ff02::1 + local ipv6_src=2001:db8:100::1 + + # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic + # and make sure they only forward unregistered IP multicast traffic + # which is not link-local. Also make sure that each entry only forwards + # traffic from the matching address family. + + # Associate two different VTEPs with one all-zeros MDB entry: Two with + # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::). + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010" + + # Associate one VTEP from each set with a regular MDB entry: One with + # an IPv4 entry and another with an IPv6 one. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo" + + # Send registered IPv4 multicast and make sure it only arrives to the + # first VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Registered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Registered IPv4 multicast - second VTEP" + + # Send unregistered IPv4 multicast that is not link-local and make sure + # it arrives to the first and second VTEPs. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Unregistered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Unregistered IPv4 multicast - second VTEP" + + # Send IPv4 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Link-local IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Link-local IPv4 multicast - second VTEP" + + # Send registered IPv4 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP" + + # Send registered IPv4 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP" + + # Make sure IPv4 traffic did not reach the VTEPs associated with + # IPv6 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 103 0 + log_test $? 0 "IPv4 traffic - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "IPv4 traffic - fourth VTEP" + + # Reset IPv4 filters before testing IPv6 traffic. + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + # Send registered IPv6 multicast and make sure it only arrives to the + # third VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 1 + log_test $? 0 "Registered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "Registered IPv6 multicast - fourth VTEP" + + # Send unregistered IPv6 multicast that is not link-local and make sure + # it arrives to the third and fourth VTEPs. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Unregistered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP" + + # Send IPv6 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Link-local IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Link-local IPv6 multicast - fourth VTEP" + + # Send registered IPv6 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP" + + # Send registered IPv6 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP" + + # Make sure IPv6 traffic did not reach the VTEPs associated with + # IPv4 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "IPv6 traffic - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IPv6 traffic - second VTEP" +} + +all_zeros_mdb_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.101 + local vtep2_ip=198.51.100.102 + local vtep3_ip=198.51.100.103 + local vtep4_ip=198.51.100.104 + local plen=32 + + echo + echo "Data path: All-zeros MDB entry - IPv4 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +all_zeros_mdb_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local vtep3_ip=2001:db8:3000::1 + local vtep4_ip=2001:db8:4000::1 + local plen=128 + + echo + echo "Data path: All-zeros MDB entry - IPv6 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +mdb_fdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Install an MDB entry and an FDB entry and make sure that the FDB + # entry only forwards traffic that was not forwarded by the MDB. + + # Associate the MDB entry with one VTEP and the FDB entry with another + # VTEP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Send IP multicast traffic and make sure it is forwarded by the MDB + # and only arrives to the first VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IP multicast - second VTEP" + + # Send broadcast traffic and make sure it is forwarded by the FDB and + # only arrives to the second VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Broadcast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Broadcast - second VTEP" + + # Remove the MDB entry and make sure that IP multicast is now forwarded + # by the FDB to the second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 2 + log_test $? 0 "IP multicast after removal - second VTEP" +} + +mdb_fdb_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_fdb_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_grp1_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local grp1=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_grp2_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp2=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_torture_common() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp1=$1; shift + local grp2=$1; shift + local src=$1; shift + local mz=$1; shift + local pid1 + local pid2 + local pid3 + local pid4 + + # Continuously send two streams that are forwarded by two different MDB + # entries. The first entry will be added and deleted in a loop. This + # allows us to test that the data path does not use freed MDB entry + # memory. The second entry will have two remotes, one that is added and + # deleted in a loop and another that is replaced in a loop. This allows + # us to test that the data path does not use freed remote entry memory. + # The test is considered successful if nothing crashed. + + # Create the MDB entries that will be continuously deleted / replaced. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010" + + mdb_grp1_loop $ns1 $vtep1_ip $grp1 & + pid1=$! + mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & + pid2=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid3=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid4=$! + + sleep 30 + kill -9 $pid1 $pid2 $pid3 $pid4 + wait $pid1 $pid2 $pid3 $pid4 2>/dev/null + + log_test 0 0 "Torture test" +} + +mdb_torture_ipv4_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +mdb_torture_ipv4_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +################################################################################ +# Usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) + -c Control path tests only + -d Data path tests only + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:cdpPvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + c) TESTS=${CONTROL_PATH_TESTS};; + d) TESTS=${DATA_PATH_TESTS};; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge mdb help 2>&1 | grep -q "src_vni" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret -- GitLab From ef63461caf427a77a04620d74ba90035a712af9c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Mar 2023 14:46:43 +0000 Subject: [PATCH 0536/2078] net: pcs: xpcs: remove double-read of link state when using AN Phylink does not want the current state of the link when reading the PCS link state - it wants the latched state. Don't double-read the MII status register. Phylink will re-read as necessary to capture transient link-down events as of dbae3388ea9c ("net: phylink: Force retrigger in case of latched link-fail indicator"). The above referenced commit is a dependency for this change, and thus this change should not be backported to any kernel that does not contain the above referenced commit. Fixes: fcb26bd2b6ca ("net: phy: Add Synopsys DesignWare XPCS MDIO module") Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index bc428a816719d..04a6853530418 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -321,7 +321,7 @@ static int xpcs_read_fault_c73(struct dw_xpcs *xpcs, return 0; } -static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an) +static int xpcs_read_link_c73(struct dw_xpcs *xpcs) { bool link = true; int ret; @@ -333,15 +333,6 @@ static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an) if (!(ret & MDIO_STAT1_LSTATUS)) link = false; - if (an) { - ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1); - if (ret < 0) - return ret; - - if (!(ret & MDIO_STAT1_LSTATUS)) - link = false; - } - return link; } @@ -935,7 +926,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs, int ret; /* Link needs to be read first ... */ - state->link = xpcs_read_link_c73(xpcs, state->an_enabled) > 0 ? 1 : 0; + state->link = xpcs_read_link_c73(xpcs) > 0 ? 1 : 0; /* ... and then we check the faults. */ ret = xpcs_read_fault_c73(xpcs, state); -- GitLab From ecec0ebbc6381a5a375f1cf10c4858f24e91e2ef Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Mar 2023 14:46:49 +0000 Subject: [PATCH 0537/2078] net: pcs: lynx: don't print an_enabled in pcs_get_state() an_enabled will be going away, and in any case, pcs_get_state() should not be updating this member. Remove the print. Signed-off-by: Russell King (Oracle) Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-lynx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index 3903f3baba2b9..622c3de3f3a8b 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -112,11 +112,11 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs, } dev_dbg(&lynx->mdio->dev, - "mode=%s/%s/%s link=%u an_enabled=%u an_complete=%u\n", + "mode=%s/%s/%s link=%u an_complete=%u\n", phy_modes(state->interface), phy_speed_to_str(state->speed), phy_duplex_to_str(state->duplex), - state->link, state->an_enabled, state->an_complete); + state->link, state->an_complete); } static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode, -- GitLab From 731b73dba359e3ff00517c13aa0daa82b34ff466 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 15 Mar 2023 08:33:02 -0700 Subject: [PATCH 0538/2078] vlan: partially enable SIOCSHWTSTAMP in container Setting timestamp filter was explicitly disabled on vlan devices in containers because it might affect other processes on the host. But it's absolutely legit in case when real device is in the same namespace. Fixes: 873017af7784 ("vlan: disable SIOCSHWTSTAMP in container") Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 296d0145932f4..5920544e93e82 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -365,7 +365,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), dev_net(real_dev))) break; fallthrough; case SIOCGMIIPHY: -- GitLab From cee1af825d65b8122627fc2efbc36c1bd51ee103 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:41 +0000 Subject: [PATCH 0539/2078] tcp: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_timer.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 ++++++----- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc072d2cfcd82..8b5b6ca6617d0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3874,7 +3874,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* We passed data and got it acked, remove any soft error * log. Something worked... */ - sk->sk_err_soft = 0; + WRITE_ONCE(sk->sk_err_soft, 0); icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed9..4f6894469b620 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -602,7 +602,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) tcp_done(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } goto out; } @@ -628,7 +628,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } out: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cb79127f45c34..8823e2182713a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); tcp_write_queue_purge(sk); @@ -110,7 +110,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ - if (sk->sk_err_soft) + if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { @@ -146,7 +146,7 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ - if (sk->sk_err_soft && !alive) + if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1bf93b61aa06f..dc963eebc668f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -497,8 +497,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; case TCP_LISTEN: break; @@ -514,9 +515,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); -- GitLab From 9a25f0cb0d7ee689f54f38890e66bc78520b0c62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:42 +0000 Subject: [PATCH 0540/2078] dccp: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 12 +++++++----- net/dccp/ipv6.c | 11 ++++++----- net/dccp/timer.c | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b780827f5e0a5..3ab68415d121c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -177,7 +177,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -339,8 +339,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } @@ -364,8 +365,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk_error_report(sk); - } else /* Only an error on timeout */ - sk->sk_err_soft = err; + } else { /* Only an error on timeout */ + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b9d7c3dd1cb39..47fb108342239 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -174,17 +174,18 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 27a3b37acd2ef..b3255e87cc7e1 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -19,7 +19,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; static void dccp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); -- GitLab From 2f2d9972affae1f3282eff97c794cf843aedf61e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:43 +0000 Subject: [PATCH 0541/2078] net: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. tcp and dccp have been handled in different patches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- fs/dlm/lowcomms.c | 7 ++++--- net/atm/signaling.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/sctp/input.c | 2 +- net/sctp/ipv6.c | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index a9b14f81d655c..bd786b3be5ecd 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -601,7 +601,7 @@ static void lowcomms_error_report(struct sock *sk) "sk_err=%d/%d\n", dlm_our_nodeid(), con->nodeid, &inet->inet_daddr, ntohs(inet->inet_dport), sk->sk_err, - sk->sk_err_soft); + READ_ONCE(sk->sk_err_soft)); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: @@ -610,14 +610,15 @@ static void lowcomms_error_report(struct sock *sk) "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), con->nodeid, &sk->sk_v6_daddr, ntohs(inet->inet_dport), sk->sk_err, - sk->sk_err_soft); + READ_ONCE(sk->sk_err_soft)); break; #endif default: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " "invalid socket family %d set, " "sk_err=%d/%d\n", dlm_our_nodeid(), - sk->sk_family, sk->sk_err, sk->sk_err_soft); + sk->sk_family, sk->sk_err, + READ_ONCE(sk->sk_err_soft)); break; } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 5de06ab8ed752..e70ae2c113f95 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -125,7 +125,7 @@ as_indicate_complete: break; case as_addparty: case as_dropparty: - sk->sk_err_soft = -msg->reply; + WRITE_ONCE(sk->sk_err_soft, -msg->reply); /* < 0 failure, otherwise ep_ref */ clear_bit(ATM_VF_WAITING, &vcc->flags); break; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8db6747f892f8..940062e08f574 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1322,7 +1322,7 @@ int inet_sk_rebuild_header(struct sock *sk) sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; + WRITE_ONCE(sk->sk_err_soft, -err); } return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38689bedfce75..e1b679a590c99 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -845,7 +845,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5a9f4d722f35d..0c50dcd35fe8c 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -120,7 +120,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); diff --git a/net/sctp/input.c b/net/sctp/input.c index bf70371301ff4..127bf28a60330 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -585,7 +585,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 62b436a2c8fef..43f2731bf590e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -155,7 +155,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } -- GitLab From e13ec3da05d130f0d10da8e1fbe1be26dcdb0e27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:44 +0000 Subject: [PATCH 0542/2078] tcp: annotate lockless access to sk->sk_err tcp_poll() reads sk->sk_err without socket lock held/owned. We should used READ_ONCE() here, and update writers to use WRITE_ONCE(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 ++++++----- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 288693981b006..01569de651b65 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; @@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8b5b6ca6617d0..754ddbe0577f1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4f6894469b620..89daa6b953ff4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -596,7 +596,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); @@ -625,7 +625,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13eb..f7e00d90a7304 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; - sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8823e2182713a..b839c2f91292f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk); tcp_write_queue_purge(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc963eebc668f..35cf523c9efd4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,7 +493,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); @@ -513,7 +513,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); -- GitLab From 9ae8e5ad99b8ebcd3d3dd46075f3825e6f08f063 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:45 +0000 Subject: [PATCH 0543/2078] mptcp: annotate lockless accesses to sk->sk_err mptcp_poll() reads sk->sk_err without socket lock held/owned. Add READ_ONCE() and WRITE_ONCE() to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- net/mptcp/protocol.c | 8 ++++---- net/mptcp/subflow.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56628b52d1001..cbaa1b49f7fe9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2019,7 +2019,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; - sk_err = ssk->sk_err; + sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc6..3005a5adf715e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2463,15 +2463,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) /* Mirror the tcp_reset() error propagation */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } inet_sk_state_store(sk, TCP_CLOSE); @@ -3791,7 +3791,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; return mask; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ae1a7304cf0d..01874059a1686 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1335,7 +1335,7 @@ fallback: subflow->reset_reason = MPTCP_RST_EMPTCP; reset: - ssk->sk_err = EBADMSG; + WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); @@ -1419,7 +1419,7 @@ void __mptcp_error_report(struct sock *sk) ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) inet_sk_state_store(sk, ssk_state); - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ smp_wmb(); -- GitLab From cc04410af7de348234ac36a5f50c4ce416efdb4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:46 +0000 Subject: [PATCH 0544/2078] af_unix: annotate lockless accesses to sk->sk_err unix_poll() and unix_dgram_poll() read sk->sk_err without any lock held. Add relevant READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0b0f18ecce447..fb31e8a4409ed 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -557,7 +557,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { - other->sk_err = ECONNRESET; + WRITE_ONCE(other->sk_err, ECONNRESET); sk_error_report(other); } } @@ -630,7 +630,7 @@ static void unix_release_sock(struct sock *sk, int embrion) /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) - skpair->sk_err = ECONNRESET; + WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); @@ -3165,7 +3165,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask = 0; /* exceptional events? */ - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; @@ -3208,7 +3208,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); -- GitLab From 2e80aeae9f807ac7e967dea7633abea5829c6531 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 15 Mar 2023 16:33:08 -0700 Subject: [PATCH 0545/2078] gve: XDP support GQI-QPL: helper function changes This patch adds/modifies helper functions needed to add XDP support. Signed-off-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 5 +++ drivers/net/ethernet/google/gve/gve_ethtool.c | 26 +++++++---- drivers/net/ethernet/google/gve/gve_main.c | 27 +++++++----- drivers/net/ethernet/google/gve/gve_rx.c | 2 +- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 2 +- drivers/net/ethernet/google/gve/gve_tx.c | 43 +++++++++++-------- drivers/net/ethernet/google/gve/gve_utils.c | 6 +-- drivers/net/ethernet/google/gve/gve_utils.h | 3 +- 8 files changed, 70 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 64eb0442c82fd..f52f23198278f 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -855,6 +855,11 @@ static inline bool gve_is_gqi(struct gve_priv *priv) priv->queue_format == GVE_GQI_QPL_FORMAT; } +static inline u32 gve_num_tx_queues(struct gve_priv *priv) +{ + return priv->tx_cfg.num_queues; +} + /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index ce574d097e280..5b6e31812fae2 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -81,8 +81,10 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); char *s = (char *)data; + int num_tx_queues; int i, j; + num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: memcpy(s, *gve_gstrings_main_stats, @@ -97,7 +99,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < num_tx_queues; i++) { for (j = 0; j < NUM_GVE_TX_CNTS; j++) { snprintf(s, ETH_GSTRING_LEN, gve_gstrings_tx_stats[j], i); @@ -124,12 +126,14 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) static int gve_get_sset_count(struct net_device *netdev, int sset) { struct gve_priv *priv = netdev_priv(netdev); + int num_tx_queues; + num_tx_queues = gve_num_tx_queues(priv); switch (sset) { case ETH_SS_STATS: return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN + (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) + - (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS); + (num_tx_queues * NUM_GVE_TX_CNTS); case ETH_SS_PRIV_FLAGS: return GVE_PRIV_FLAGS_STR_LEN; default: @@ -153,18 +157,20 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_priv *priv; bool skip_nic_stats; unsigned int start; + int num_tx_queues; int ring; int i, j; ASSERT_RTNL(); priv = netdev_priv(netdev); + num_tx_queues = gve_num_tx_queues(priv); report_stats = priv->stats_report->stats; rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; - tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues, + tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); @@ -195,7 +201,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; - ring < priv->tx_cfg.num_queues; ring++) { + ring < num_tx_queues; ring++) { if (priv->tx) { do { start = @@ -232,7 +238,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = GVE_MAIN_STATS_LEN; /* For rx cross-reporting stats, start from nic rx stats in report */ - base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues + + base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + base_stats_idx; @@ -298,7 +304,7 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -316,7 +322,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } /* walk TX rings */ if (priv->tx) { - for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + for (ring = 0; ring < num_tx_queues; ring++) { struct gve_tx_ring *tx = &priv->tx[ring]; if (gve_is_gqi(priv)) { @@ -355,7 +361,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } } } else { - i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS; + i += num_tx_queues * NUM_GVE_TX_CNTS; } kfree(rx_qid_to_stats_idx); @@ -502,7 +508,9 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) { struct gve_priv *priv = netdev_priv(netdev); u64 ori_flags, new_flags; + int num_tx_queues; + num_tx_queues = gve_num_tx_queues(priv); ori_flags = READ_ONCE(priv->ethtool_flags); new_flags = ori_flags; @@ -522,7 +530,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) /* delete report stats timer. */ if (!(flags & BIT(0)) && (ori_flags & BIT(0))) { int tx_stats_num = GVE_TX_STATS_REPORT_NUM * - priv->tx_cfg.num_queues; + num_tx_queues; int rx_stats_num = GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 07111c241e0eb..3cfdeeb74f60e 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -90,8 +90,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) struct gve_priv *priv = netdev_priv(dev); unsigned int start; u64 packets, bytes; + int num_tx_queues; int ring; + num_tx_queues = gve_num_tx_queues(priv); if (priv->rx) { for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { @@ -106,7 +108,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } if (priv->tx) { - for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + for (ring = 0; ring < num_tx_queues; ring++) { do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); @@ -180,7 +182,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv) int tx_stats_num, rx_stats_num; tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * - priv->tx_cfg.num_queues; + gve_num_tx_queues(priv); rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * priv->rx_cfg.num_queues; priv->stats_report_len = struct_size(priv->stats_report, stats, @@ -622,20 +624,21 @@ static int gve_unregister_qpls(struct gve_priv *priv) static int gve_create_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int err; int i; - err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); + err = gve_adminq_create_tx_queues(priv, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); /* This failure will trigger a reset - no need to clean * up */ return err; } netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); if (err) { @@ -675,7 +678,7 @@ static void add_napi_init_sync_stats(struct gve_priv *priv, int i; /* Add tx napi & init sync stats*/ - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < gve_num_tx_queues(priv); i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); @@ -753,9 +756,10 @@ free_tx: static int gve_destroy_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int err; - err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); + err = gve_adminq_destroy_tx_queues(priv, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to destroy tx queues\n"); @@ -784,11 +788,12 @@ static void gve_rx_free_rings(struct gve_priv *priv) static void gve_free_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int ntfy_idx; int i; if (priv->tx) { - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < num_tx_queues; i++) { ntfy_idx = gve_tx_idx_to_ntfy(priv, i); gve_remove_napi(priv, ntfy_idx); } @@ -1118,7 +1123,7 @@ static void gve_turndown(struct gve_priv *priv) return; /* Disable napi to prevent more work from coming in */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; @@ -1146,7 +1151,7 @@ static void gve_turnup(struct gve_priv *priv) netif_tx_start_all_queues(priv->dev); /* Enable napi and unmask interrupts for all queues */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; @@ -1306,7 +1311,7 @@ void gve_handle_report_stats(struct gve_priv *priv) be64_add_cpu(&priv->stats_report->written_count, 1); /* tx stats */ if (priv->tx) { - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { u32 last_completion = 0; u32 tx_frames = 0; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 1f55137722b04..db1c74b1d7d35 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -556,7 +556,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, if (len <= priv->rx_copybreak && is_only_frag) { /* Just copy small packets */ - skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); + skb = gve_rx_copy(netdev, napi, page_info, len); if (skb) { u64_stats_update_begin(&rx->statss); rx->rx_copied_pkt++; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 630f42a3037b0..e57b73eb70f62 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -568,7 +568,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (eop && buf_len <= priv->rx_copybreak) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, - &buf_state->page_info, buf_len, 0); + &buf_state->page_info, buf_len); if (unlikely(!rx->ctx.skb_head)) goto error; rx->ctx.skb_tail = rx->ctx.skb_head; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4888bf05fbedb..0fb052ce9e0b0 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -374,18 +374,18 @@ static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, } static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, - struct sk_buff *skb, bool is_gso, + u16 csum_offset, u8 ip_summed, bool is_gso, int l4_hdr_offset, u32 desc_cnt, - u16 hlen, u64 addr) + u16 hlen, u64 addr, u16 pkt_len) { /* l4_hdr_offset and csum_offset are in units of 16-bit words */ if (is_gso) { pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; - pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_csum_offset = csum_offset >> 1; pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; - } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + } else if (likely(ip_summed == CHECKSUM_PARTIAL)) { pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; - pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_csum_offset = csum_offset >> 1; pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; } else { pkt_desc->pkt.type_flags = GVE_TXD_STD; @@ -393,7 +393,7 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, pkt_desc->pkt.l4_hdr_offset = 0; } pkt_desc->pkt.desc_cnt = desc_cnt; - pkt_desc->pkt.len = cpu_to_be16(skb->len); + pkt_desc->pkt.len = cpu_to_be16(pkt_len); pkt_desc->pkt.seg_len = cpu_to_be16(hlen); pkt_desc->pkt.seg_addr = cpu_to_be64(addr); } @@ -412,15 +412,16 @@ static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc, } static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, - struct sk_buff *skb, bool is_gso, + u16 l3_offset, u16 gso_size, + bool is_gso_v6, bool is_gso, u16 len, u64 addr) { seg_desc->seg.type_flags = GVE_TXD_SEG; if (is_gso) { - if (skb_is_gso_v6(skb)) + if (is_gso_v6) seg_desc->seg.type_flags |= GVE_TXSF_IPV6; - seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; - seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + seg_desc->seg.l3_offset = l3_offset >> 1; + seg_desc->seg.mss = cpu_to_be16(gso_size); } seg_desc->seg.seg_len = cpu_to_be16(len); seg_desc->seg.seg_addr = cpu_to_be64(addr); @@ -473,9 +474,10 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, &info->iov[payload_iov]); - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed, + is_gso, l4_hdr_offset, 1 + mtd_desc_nr + payload_nfrags, hlen, - info->iov[hdr_nfrags - 1].iov_offset); + info->iov[hdr_nfrags - 1].iov_offset, skb->len); skb_copy_bits(skb, 0, tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, @@ -494,7 +496,9 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; seg_desc = &tx->desc[next_idx]; - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, info->iov[i].iov_len, info->iov[i].iov_offset); @@ -552,8 +556,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, if (mtd_desc_nr) num_descriptors++; - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, - num_descriptors, hlen, addr); + gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed, + is_gso, l4_hdr_offset, + num_descriptors, hlen, addr, skb->len); if (mtd_desc_nr) { idx = (idx + 1) & tx->mask; @@ -569,7 +574,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, addr += hlen; idx = (idx + 1) & tx->mask; seg_desc = &tx->desc[idx]; - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, len, addr); } for (i = 0; i < shinfo->nr_frags; i++) { @@ -587,7 +594,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, dma_unmap_len_set(&tx->info[idx], len, len); dma_unmap_addr_set(&tx->info[idx], dma, addr); - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, len, addr); } return num_descriptors; diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 6ba46adaaee3d..26e08d7532702 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -49,10 +49,10 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) } struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len, - u16 padding) + struct gve_rx_slot_page_info *page_info, u16 len) { - void *va = page_info->page_address + padding + page_info->page_offset; + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 79595940b3511..324fd98a61124 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -18,8 +18,7 @@ void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len, - u16 pad); + struct gve_rx_slot_page_info *page_info, u16 len); /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info); -- GitLab From 7fc2bf78a430f975e53f2b1e87e99d8f83cafd11 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 15 Mar 2023 16:33:09 -0700 Subject: [PATCH 0546/2078] gve: Changes to add new TX queues Changes to enable adding and removing TX queues without calling gve_close() and gve_open(). Made the following changes: 1) priv->tx, priv->rx and priv->qpls arrays are allocated based on max tx queues and max rx queues 2) Changed gve_adminq_create_tx_queues(), gve_adminq_destroy_tx_queues(), gve_tx_alloc_rings() and gve_tx_free_rings() functions to add/remove a subset of TX queues rather than all the TX queues. Signed-off-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 45 +++++++---- drivers/net/ethernet/google/gve/gve_adminq.c | 8 +- drivers/net/ethernet/google/gve/gve_adminq.h | 4 +- drivers/net/ethernet/google/gve/gve_main.c | 83 ++++++++++++++------ drivers/net/ethernet/google/gve/gve_rx.c | 2 +- drivers/net/ethernet/google/gve/gve_tx.c | 12 +-- 6 files changed, 104 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index f52f23198278f..f354a6448c257 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -798,16 +798,35 @@ static inline u32 gve_num_rx_qpls(struct gve_priv *priv) return priv->rx_cfg.num_queues; } +static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid) +{ + return tx_qid; +} + +static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) +{ + return priv->tx_cfg.max_queues + rx_qid; +} + +static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) +{ + return gve_tx_qpl_id(priv, 0); +} + +static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv) +{ + return gve_rx_qpl_id(priv, 0); +} + /* Returns a pointer to the next available tx qpl in the list of qpls */ static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) +struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid) { - int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size); + int id = gve_tx_qpl_id(priv, tx_qid); - /* we are out of tx qpls */ - if (id >= gve_num_tx_qpls(priv)) + /* QPL already in use */ + if (test_bit(id, priv->qpl_cfg.qpl_id_map)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); @@ -817,14 +836,12 @@ struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) /* Returns a pointer to the next available rx qpl in the list of qpls */ static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) +struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid) { - int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size, - gve_num_tx_qpls(priv)); + int id = gve_rx_qpl_id(priv, rx_qid); - /* we are out of rx qpls */ - if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv)) + /* QPL already in use */ + if (test_bit(id, priv->qpl_cfg.qpl_id_map)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); @@ -843,7 +860,7 @@ static inline void gve_unassign_qpl(struct gve_priv *priv, int id) static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) { - if (id < gve_num_tx_qpls(priv)) + if (id < gve_rx_start_qpl_id(priv)) return DMA_TO_DEVICE; else return DMA_FROM_DEVICE; @@ -869,8 +886,8 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); bool gve_tx_poll(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings(struct gve_priv *priv); -void gve_tx_free_rings_gqi(struct gve_priv *priv); +int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings); +void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings); u32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 60061288ad9d4..252974202a3fb 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -516,12 +516,12 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) return gve_adminq_issue_cmd(priv, &cmd); } -int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues) +int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues) { int err; int i; - for (i = 0; i < num_queues; i++) { + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_create_tx_queue(priv, i); if (err) return err; @@ -604,12 +604,12 @@ static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) return 0; } -int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues) +int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues) { int err; int i; - for (i = 0; i < num_queues; i++) { + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_destroy_tx_queue(priv, i); if (err) return err; diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index cf29662e6ad11..f894beb3deaf7 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -410,8 +410,8 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, dma_addr_t db_array_bus_addr, u32 num_ntfy_blks); int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); -int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues); -int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id); +int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 3cfdeeb74f60e..160ca77c2751f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -584,11 +584,26 @@ static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) static int gve_register_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int start_id; int err; int i; - for (i = 0; i < num_qpls; i++) { + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + } + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { err = gve_adminq_register_page_list(priv, &priv->qpls[i]); if (err) { netif_err(priv, drv, priv->dev, @@ -605,11 +620,24 @@ static int gve_register_qpls(struct gve_priv *priv) static int gve_unregister_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int start_id; int err; int i; - for (i = 0; i < num_qpls; i++) { + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean up */ + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + priv->qpls[i].id); + return err; + } + } + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); /* This failure will trigger a reset - no need to clean up */ if (err) { @@ -628,7 +656,7 @@ static int gve_create_rings(struct gve_priv *priv) int err; int i; - err = gve_adminq_create_tx_queues(priv, num_tx_queues); + err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", num_tx_queues); @@ -695,10 +723,10 @@ static void add_napi_init_sync_stats(struct gve_priv *priv, } } -static void gve_tx_free_rings(struct gve_priv *priv) +static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) { if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv); + gve_tx_free_rings_gqi(priv, start_id, num_rings); } else { gve_tx_free_rings_dqo(priv); } @@ -709,20 +737,20 @@ static int gve_alloc_rings(struct gve_priv *priv) int err; /* Setup tx rings */ - priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx), + priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), GFP_KERNEL); if (!priv->tx) return -ENOMEM; if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv); + err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); else err = gve_tx_alloc_rings_dqo(priv); if (err) goto free_tx; /* Setup rx rings */ - priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx), + priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), GFP_KERNEL); if (!priv->rx) { err = -ENOMEM; @@ -747,7 +775,7 @@ free_rx: kvfree(priv->rx); priv->rx = NULL; free_tx_queue: - gve_tx_free_rings(priv); + gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); free_tx: kvfree(priv->tx); priv->tx = NULL; @@ -759,7 +787,7 @@ static int gve_destroy_rings(struct gve_priv *priv) int num_tx_queues = gve_num_tx_queues(priv); int err; - err = gve_adminq_destroy_tx_queues(priv, num_tx_queues); + err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to destroy tx queues\n"); @@ -797,7 +825,7 @@ static void gve_free_rings(struct gve_priv *priv) ntfy_idx = gve_tx_idx_to_ntfy(priv, i); gve_remove_napi(priv, ntfy_idx); } - gve_tx_free_rings(priv); + gve_tx_free_rings(priv, 0, num_tx_queues); kvfree(priv->tx); priv->tx = NULL; } @@ -894,40 +922,46 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); kvfree(qpl->page_buses); + qpl->page_buses = NULL; free_pages: kvfree(qpl->pages); + qpl->pages = NULL; priv->num_registered_pages -= qpl->num_entries; } static int gve_alloc_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; + int start_id; int i, j; int err; - if (num_qpls == 0) + if (priv->queue_format != GVE_GQI_QPL_FORMAT) return 0; - priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); + priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); if (!priv->qpls) return -ENOMEM; - for (i = 0; i < gve_num_tx_qpls(priv); i++) { + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, priv->tx_pages_per_qpl); if (err) goto free_qpls; } - for (; i < num_qpls; i++) { + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, priv->rx_data_slot_cnt); if (err) goto free_qpls; } - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * + priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls), + priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), sizeof(unsigned long), GFP_KERNEL); if (!priv->qpl_cfg.qpl_id_map) { err = -ENOMEM; @@ -940,23 +974,26 @@ free_qpls: for (j = 0; j <= i; j++) gve_free_queue_page_list(priv, j); kvfree(priv->qpls); + priv->qpls = NULL; return err; } static void gve_free_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; int i; - if (num_qpls == 0) + if (!priv->qpls) return; kvfree(priv->qpl_cfg.qpl_id_map); + priv->qpl_cfg.qpl_id_map = NULL; - for (i = 0; i < num_qpls; i++) + for (i = 0; i < max_queues; i++) gve_free_queue_page_list(priv, i); kvfree(priv->qpls); + priv->qpls = NULL; } /* Use this to schedule a reset when the device is capable of continuing diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index db1c74b1d7d35..051a15e4f1afd 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -124,7 +124,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) return -ENOMEM; if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(priv); + rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num); if (!rx->data.qpl) { kvfree(rx->data.page_info); rx->data.page_info = NULL; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 0fb052ce9e0b0..e24e73e74e339 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -195,7 +195,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; tx->dev = &priv->pdev->dev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); + tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx); if (!tx->tx_fifo.qpl) goto abort_with_desc; /* map Tx FIFO */ @@ -233,12 +233,12 @@ abort_with_info: return -ENOMEM; } -int gve_tx_alloc_rings(struct gve_priv *priv) +int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings) { int err = 0; int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = start_id; i < start_id + num_rings; i++) { err = gve_tx_alloc_ring(priv, i); if (err) { netif_err(priv, drv, priv->dev, @@ -251,17 +251,17 @@ int gve_tx_alloc_rings(struct gve_priv *priv) if (err) { int j; - for (j = 0; j < i; j++) + for (j = start_id; j < i; j++) gve_tx_free_ring(priv, j); } return err; } -void gve_tx_free_rings_gqi(struct gve_priv *priv) +void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings) { int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) + for (i = start_id; i < start_id + num_rings; i++) gve_tx_free_ring(priv, i); } -- GitLab From 75eaae158b1b7d8d5bde2bafc0bcf778423071d3 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 15 Mar 2023 16:33:10 -0700 Subject: [PATCH 0547/2078] gve: Add XDP DROP and TX support for GQI-QPL format Add support for XDP PASS, DROP and TX actions. This patch contains the following changes: 1) Support installing/uninstalling XDP program 2) Add dedicated XDP TX queues 3) Add support for XDP DROP action 4) Add support for XDP TX action Signed-off-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 44 +- drivers/net/ethernet/google/gve/gve_ethtool.c | 37 +- drivers/net/ethernet/google/gve/gve_main.c | 422 +++++++++++++++++- drivers/net/ethernet/google/gve/gve_rx.c | 74 ++- drivers/net/ethernet/google/gve/gve_tx.c | 149 ++++++- 5 files changed, 687 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index f354a6448c257..8d5234d4ba67b 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -47,6 +47,10 @@ #define GVE_RX_BUFFER_SIZE_DQO 2048 +#define GVE_XDP_ACTIONS 5 + +#define GVE_TX_MAX_HEADER_SIZE 182 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ @@ -230,7 +234,9 @@ struct gve_rx_ring { u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */ - + u64 xdp_tx_errors; + u64 xdp_redirect_errors; + u64 xdp_actions[GVE_XDP_ACTIONS]; u32 q_num; /* queue index */ u32 ntfy_id; /* notification block index */ struct gve_queue_resources *q_resources; /* head and tail pointer idx */ @@ -238,6 +244,9 @@ struct gve_rx_ring { struct u64_stats_sync statss; /* sync stats for 32bit archs */ struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */ + + /* XDP stuff */ + struct xdp_rxq_info xdp_rxq; }; /* A TX desc ring entry */ @@ -259,6 +268,9 @@ struct gve_tx_iovec { */ struct gve_tx_buffer_state { struct sk_buff *skb; /* skb for this pkt */ + struct { + u16 size; /* size of xmitted xdp pkt */ + } xdp; union { struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ struct { @@ -526,9 +538,11 @@ struct gve_priv { u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ + struct bpf_prog *xdp_prog; /* XDP BPF program */ u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ + u16 num_xdp_queues; struct gve_queue_config tx_cfg; struct gve_queue_config rx_cfg; struct gve_qpl_config qpl_cfg; /* map used QPL ids */ @@ -785,7 +799,17 @@ static inline u32 gve_num_tx_qpls(struct gve_priv *priv) if (priv->queue_format != GVE_GQI_QPL_FORMAT) return 0; - return priv->tx_cfg.num_queues; + return priv->tx_cfg.num_queues + priv->num_xdp_queues; +} + +/* Returns the number of XDP tx queue page lists + */ +static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) +{ + if (priv->queue_format != GVE_GQI_QPL_FORMAT) + return 0; + + return priv->num_xdp_queues; } /* Returns the number of rx queue page lists @@ -874,7 +898,17 @@ static inline bool gve_is_gqi(struct gve_priv *priv) static inline u32 gve_num_tx_queues(struct gve_priv *priv) { - return priv->tx_cfg.num_queues; + return priv->tx_cfg.num_queues + priv->num_xdp_queues; +} + +static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) +{ + return priv->tx_cfg.num_queues + queue_id; +} + +static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) +{ + return gve_xdp_tx_queue_id(priv, 0); } /* buffers */ @@ -885,7 +919,11 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); +int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len); +void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); +bool gve_xdp_poll(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings); void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings); u32 gve_tx_load_event_counter(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 5b6e31812fae2..067b393ccf9d7 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -34,6 +34,11 @@ static u32 gve_get_msglevel(struct net_device *netdev) return priv->msg_enable; } +/* For the following stats column string names, make sure the order + * matches how it is filled in the code. For xdp_aborted, xdp_drop, + * xdp_pass, xdp_tx, xdp_redirect, make sure it also matches the order + * as declared in enum xdp_action inside file uapi/linux/bpf.h . + */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", @@ -49,6 +54,9 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", + "rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]", + "rx_xdp_tx[%u]", "rx_xdp_redirect[%u]", + "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", }; static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { @@ -289,14 +297,25 @@ gve_get_ethtool_stats(struct net_device *netdev, if (skip_nic_stats) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; - continue; - } - for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { - u64 value = - be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value); + } else { + stats_idx = rx_qid_to_stats_idx[ring]; + for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { + u64 value = + be64_to_cpu(report_stats[stats_idx + j].value); - data[i++] = value; + data[i++] = value; + } } + /* XDP rx counters */ + do { + start = u64_stats_fetch_begin(&priv->rx[ring].statss); + for (j = 0; j < GVE_XDP_ACTIONS; j++) + data[i + j] = rx->xdp_actions[j]; + data[i + j++] = rx->xdp_tx_errors; + data[i + j++] = rx->xdp_redirect_errors; + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + i += GVE_XDP_ACTIONS + 2; /* XDP rx counters */ } } else { i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; @@ -418,6 +437,12 @@ static int gve_set_channels(struct net_device *netdev, if (!new_rx || !new_tx) return -EINVAL; + if (priv->num_xdp_queues && + (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) { + dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); + return -EINVAL; + } + if (!netif_carrier_ok(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 160ca77c2751f..f493988579210 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,8 +4,10 @@ * Copyright (C) 2015-2021 Google, Inc. */ +#include #include #include +#include #include #include #include @@ -247,8 +249,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) block = container_of(napi, struct gve_notify_block, napi); priv = block->priv; - if (block->tx) - reschedule |= gve_tx_poll(block, budget); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll(block, budget); + else + reschedule |= gve_xdp_poll(block, budget); + } + if (block->rx) { work_done = gve_rx_poll(block, budget); reschedule |= work_done == budget; @@ -582,6 +589,28 @@ static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) netif_napi_del(&block->napi); } +static int gve_register_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int err; + int i; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + } + return 0; +} + static int gve_register_qpls(struct gve_priv *priv) { int start_id; @@ -618,6 +647,26 @@ static int gve_register_qpls(struct gve_priv *priv) return 0; } +static int gve_unregister_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int err; + int i; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean up */ + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + priv->qpls[i].id); + return err; + } + } + return 0; +} + static int gve_unregister_qpls(struct gve_priv *priv) { int start_id; @@ -650,6 +699,27 @@ static int gve_unregister_qpls(struct gve_priv *priv) return 0; } +static int gve_create_xdp_rings(struct gve_priv *priv) +{ + int err; + + err = gve_adminq_create_tx_queues(priv, + gve_xdp_tx_start_queue_id(priv), + priv->num_xdp_queues); + if (err) { + netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", + priv->num_xdp_queues); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", + priv->num_xdp_queues); + + return 0; +} + static int gve_create_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -699,6 +769,23 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } +static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, + int (*napi_poll)(struct napi_struct *napi, + int budget)) +{ + int start_id = gve_xdp_tx_start_queue_id(priv); + int i; + + /* Add xdp tx napi & init sync stats*/ + for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + + u64_stats_init(&priv->tx[i].statss); + priv->tx[i].ntfy_id = ntfy_idx; + gve_add_napi(priv, ntfy_idx, napi_poll); + } +} + static void add_napi_init_sync_stats(struct gve_priv *priv, int (*napi_poll)(struct napi_struct *napi, int budget)) @@ -732,6 +819,23 @@ static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings } } +static int gve_alloc_xdp_rings(struct gve_priv *priv) +{ + int start_id; + int err = 0; + + if (!priv->num_xdp_queues) + return 0; + + start_id = gve_xdp_tx_start_queue_id(priv); + err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); + if (err) + return err; + add_napi_init_xdp_sync_stats(priv, gve_napi_poll); + + return 0; +} + static int gve_alloc_rings(struct gve_priv *priv) { int err; @@ -782,6 +886,26 @@ free_tx: return err; } +static int gve_destroy_xdp_rings(struct gve_priv *priv) +{ + int start_id; + int err; + + start_id = gve_xdp_tx_start_queue_id(priv); + err = gve_adminq_destroy_tx_queues(priv, + start_id, + priv->num_xdp_queues); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to destroy XDP queues\n"); + /* This failure will trigger a reset - no need to clean up */ + return err; + } + netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); + + return 0; +} + static int gve_destroy_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -814,6 +938,21 @@ static void gve_rx_free_rings(struct gve_priv *priv) gve_rx_free_rings_dqo(priv); } +static void gve_free_xdp_rings(struct gve_priv *priv) +{ + int ntfy_idx, start_id; + int i; + + start_id = gve_xdp_tx_start_queue_id(priv); + if (priv->tx) { + for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + gve_remove_napi(priv, ntfy_idx); + } + gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); + } +} + static void gve_free_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -929,6 +1068,28 @@ free_pages: priv->num_registered_pages -= qpl->num_entries; } +static int gve_alloc_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int i, j; + int err; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { + err = gve_alloc_queue_page_list(priv, i, + priv->tx_pages_per_qpl); + if (err) + goto free_qpls; + } + + return 0; + +free_qpls: + for (j = start_id; j <= i; j++) + gve_free_queue_page_list(priv, j); + return err; +} + static int gve_alloc_qpls(struct gve_priv *priv) { int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; @@ -978,6 +1139,16 @@ free_qpls: return err; } +static void gve_free_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int i; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) + gve_free_queue_page_list(priv, i); +} + static void gve_free_qpls(struct gve_priv *priv) { int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; @@ -1011,11 +1182,64 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) +{ + struct napi_struct *napi; + struct gve_rx_ring *rx; + int err = 0; + int i, j; + + if (!priv->num_xdp_queues) + return 0; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + rx = &priv->rx[i]; + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + + err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, + napi->napi_id); + if (err) + goto err; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err) + goto err; + } + return 0; + +err: + for (j = i; j >= 0; j--) { + rx = &priv->rx[j]; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + } + return err; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->num_xdp_queues) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + xdp_rxq_info_unreg(&rx->xdp_rxq); + } +} + static int gve_open(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); int err; + if (priv->xdp_prog) + priv->num_xdp_queues = priv->rx_cfg.num_queues; + else + priv->num_xdp_queues = 0; + err = gve_alloc_qpls(priv); if (err) return err; @@ -1031,6 +1255,10 @@ static int gve_open(struct net_device *dev) if (err) goto free_rings; + err = gve_reg_xdp_info(priv, dev); + if (err) + goto free_rings; + err = gve_register_qpls(priv); if (err) goto reset; @@ -1095,6 +1323,7 @@ static int gve_close(struct net_device *dev) } del_timer_sync(&priv->stats_report_timer); + gve_unreg_xdp_info(priv); gve_free_rings(priv); gve_free_qpls(priv); priv->interface_down_cnt++; @@ -1111,6 +1340,167 @@ err: return gve_reset_recovery(priv, false); } +static int gve_remove_xdp_queues(struct gve_priv *priv) +{ + int err; + + err = gve_destroy_xdp_rings(priv); + if (err) + return err; + + err = gve_unregister_xdp_qpls(priv); + if (err) + return err; + + gve_unreg_xdp_info(priv); + gve_free_xdp_rings(priv); + gve_free_xdp_qpls(priv); + priv->num_xdp_queues = 0; + return 0; +} + +static int gve_add_xdp_queues(struct gve_priv *priv) +{ + int err; + + priv->num_xdp_queues = priv->tx_cfg.num_queues; + + err = gve_alloc_xdp_qpls(priv); + if (err) + goto err; + + err = gve_alloc_xdp_rings(priv); + if (err) + goto free_xdp_qpls; + + err = gve_reg_xdp_info(priv, priv->dev); + if (err) + goto free_xdp_rings; + + err = gve_register_xdp_qpls(priv); + if (err) + goto free_xdp_rings; + + err = gve_create_xdp_rings(priv); + if (err) + goto free_xdp_rings; + + return 0; + +free_xdp_rings: + gve_free_xdp_rings(priv); +free_xdp_qpls: + gve_free_xdp_qpls(priv); +err: + priv->num_xdp_queues = 0; + return err; +} + +static void gve_handle_link_status(struct gve_priv *priv, bool link_status) +{ + if (!gve_get_napi_enabled(priv)) + return; + + if (link_status == netif_carrier_ok(priv->dev)) + return; + + if (link_status) { + netdev_info(priv->dev, "Device link is up.\n"); + netif_carrier_on(priv->dev); + } else { + netdev_info(priv->dev, "Device link is down.\n"); + netif_carrier_off(priv->dev); + } +} + +static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bpf_prog *old_prog; + int err = 0; + u32 status; + + old_prog = READ_ONCE(priv->xdp_prog); + if (!netif_carrier_ok(priv->dev)) { + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + return 0; + } + + gve_turndown(priv); + if (!old_prog && prog) { + // Allocate XDP TX queues if an XDP program is + // being installed + err = gve_add_xdp_queues(priv); + if (err) + goto out; + } else if (old_prog && !prog) { + // Remove XDP TX queues if an XDP program is + // being uninstalled + err = gve_remove_xdp_queues(priv); + if (err) + goto out; + } + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + +out: + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); + return err; +} + +static int verify_xdp_configuration(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (dev->features & NETIF_F_LRO) { + netdev_warn(dev, "XDP is not supported when LRO is on.\n"); + return -EOPNOTSUPP; + } + + if (priv->queue_format != GVE_GQI_QPL_FORMAT) { + netdev_warn(dev, "XDP is not supported in mode %d.\n", + priv->queue_format); + return -EOPNOTSUPP; + } + + if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { + netdev_warn(dev, "XDP is not supported for mtu %d.\n", + dev->mtu); + return -EOPNOTSUPP; + } + + if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || + (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { + netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", + priv->rx_cfg.num_queues, + priv->tx_cfg.num_queues, + priv->tx_cfg.max_queues); + return -EINVAL; + } + return 0; +} + +static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = verify_xdp_configuration(dev); + if (err) + return err; + switch (xdp->command) { + case XDP_SETUP_PROG: + return gve_set_xdp(priv, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config) @@ -1305,6 +1695,7 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_get_stats64 = gve_get_stats, .ndo_tx_timeout = gve_tx_timeout, .ndo_set_features = gve_set_features, + .ndo_bpf = gve_xdp, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -1411,23 +1802,6 @@ void gve_handle_report_stats(struct gve_priv *priv) } } -static void gve_handle_link_status(struct gve_priv *priv, bool link_status) -{ - if (!gve_get_napi_enabled(priv)) - return; - - if (link_status == netif_carrier_ok(priv->dev)) - return; - - if (link_status) { - netdev_info(priv->dev, "Device link is up.\n"); - netif_carrier_on(priv->dev); - } else { - netdev_info(priv->dev, "Device link is down.\n"); - netif_carrier_off(priv->dev); - } -} - /* Handle NIC status register changes, reset requests and report stats */ static void gve_service_task(struct work_struct *work) { @@ -1441,6 +1815,15 @@ static void gve_service_task(struct work_struct *work) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } +static void gve_set_netdev_xdp_features(struct gve_priv *priv) +{ + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { + priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; + } else { + priv->dev->xdp_features = 0; + } +} + static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) { int num_ntfy; @@ -1519,6 +1902,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) } setup_device: + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); if (!err) return 0; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 051a15e4f1afd..3241f6ea29be5 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -8,6 +8,8 @@ #include "gve_adminq.h" #include "gve_utils.h" #include +#include +#include static void gve_rx_free_buffer(struct device *dev, struct gve_rx_slot_page_info *page_info, @@ -591,6 +593,43 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, return skb; } +static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act) +{ + struct gve_tx_ring *tx; + int tx_qid; + int err; + + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + break; + case XDP_TX: + tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); + tx = &priv->tx[tx_qid]; + err = gve_xdp_xmit_one(priv, tx, xdp->data, + xdp->data_end - xdp->data); + + if (unlikely(err)) { + u64_stats_update_begin(&rx->statss); + rx->xdp_tx_errors++; + u64_stats_update_end(&rx->statss); + } + break; + case XDP_REDIRECT: + u64_stats_update_begin(&rx->statss); + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); + break; + } + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); +} + #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, struct gve_rx_desc *desc, u32 idx, @@ -603,9 +642,12 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, union gve_rx_data_slot *data_slot; struct gve_priv *priv = rx->gve; struct sk_buff *skb = NULL; + struct bpf_prog *xprog; + struct xdp_buff xdp; dma_addr_t page_bus; void *va; + u16 len = frag_size; struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; bool is_first_frag = ctx->frag_cnt == 0; @@ -645,9 +687,35 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE); page_info->pad = is_first_frag ? GVE_RX_PAD : 0; + len -= page_info->pad; frag_size -= page_info->pad; - skb = gve_rx_skb(priv, rx, page_info, napi, frag_size, + xprog = READ_ONCE(priv->xdp_prog); + if (xprog && is_only_frag) { + void *old_data; + int xdp_act; + + xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq); + xdp_prepare_buff(&xdp, page_info->page_address + + page_info->page_offset, GVE_RX_PAD, + len, false); + old_data = xdp.data; + xdp_act = bpf_prog_run_xdp(xprog, &xdp); + if (xdp_act != XDP_PASS) { + gve_xdp_done(priv, rx, &xdp, xprog, xdp_act); + ctx->total_size += frag_size; + goto finish_ok_pkt; + } + + page_info->pad += xdp.data - old_data; + len = xdp.data_end - xdp.data; + + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + } + + skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot, is_only_frag); if (!skb) { u64_stats_update_begin(&rx->statss); @@ -773,6 +841,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, netdev_features_t feat) { + u64 xdp_txs = rx->xdp_actions[XDP_TX]; struct gve_rx_ctx *ctx = &rx->ctx; struct gve_priv *priv = rx->gve; struct gve_rx_cnts cnts = {0}; @@ -820,6 +889,9 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, u64_stats_update_end(&rx->statss); } + if (xdp_txs != rx->xdp_actions[XDP_TX]) + gve_xdp_tx_flush(priv, rx->q_num); + /* restock ring slots */ if (!rx->data.raw_addressing) { /* In QPL mode buffs are refilled as the desc are processed */ diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index e24e73e74e339..3e96ee7537ce4 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -19,6 +19,14 @@ static inline void gve_tx_put_doorbell(struct gve_priv *priv, iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); } +void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid) +{ + u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid); + struct gve_tx_ring *tx = &priv->tx[tx_qid]; + + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); +} + /* gvnic can only transmit from a Registered Segment. * We copy skb payloads into the registered segment before writing Tx * descriptors and ringing the Tx doorbell. @@ -132,6 +140,50 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) atomic_add(bytes, &fifo->available); } +static size_t gve_tx_clear_buffer_state(struct gve_tx_buffer_state *info) +{ + size_t space_freed = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(info->iov); i++) { + space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; + info->iov[i].iov_len = 0; + info->iov[i].iov_padding = 0; + } + return space_freed; +} + +static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, + u32 to_do) +{ + struct gve_tx_buffer_state *info; + u32 clean_end = tx->done + to_do; + u64 pkts = 0, bytes = 0; + size_t space_freed = 0; + u32 idx; + + for (; tx->done < clean_end; tx->done++) { + idx = tx->done & tx->mask; + info = &tx->info[idx]; + + if (unlikely(!info->xdp.size)) + continue; + + bytes += info->xdp.size; + pkts++; + + info->xdp.size = 0; + space_freed += gve_tx_clear_buffer_state(info); + } + + gve_tx_free_fifo(&tx->tx_fifo, space_freed); + u64_stats_update_begin(&tx->statss); + tx->bytes_done += bytes; + tx->pkt_done += pkts; + u64_stats_update_end(&tx->statss); + return pkts; +} + static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do, bool try_to_wake); @@ -144,8 +196,12 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); - netdev_tx_reset_queue(tx->netdev_txq); + if (tx->q_num < priv->tx_cfg.num_queues) { + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + netdev_tx_reset_queue(tx->netdev_txq); + } else { + gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); + } dma_free_coherent(hdev, sizeof(*tx->q_resources), tx->q_resources, tx->q_resources_bus); @@ -213,7 +269,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, (unsigned long)tx->bus); - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + if (idx < priv->tx_cfg.num_queues) + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); gve_tx_add_to_block(priv, idx); return 0; @@ -657,6 +714,65 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len) +{ + int pad, nfrags, ndescs, iovi, offset; + struct gve_tx_buffer_state *info; + u32 reqi = tx->req; + + pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len); + if (pad >= GVE_TX_MAX_HEADER_SIZE) + pad = 0; + info = &tx->info[reqi & tx->mask]; + info->xdp.size = len; + + nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len, + &info->iov[0]); + iovi = pad > 0; + ndescs = nfrags - iovi; + offset = 0; + + while (iovi < nfrags) { + if (!offset) + gve_tx_fill_pkt_desc(&tx->desc[reqi & tx->mask], 0, + CHECKSUM_NONE, false, 0, ndescs, + info->iov[iovi].iov_len, + info->iov[iovi].iov_offset, len); + else + gve_tx_fill_seg_desc(&tx->desc[reqi & tx->mask], + 0, 0, false, false, + info->iov[iovi].iov_len, + info->iov[iovi].iov_offset); + + memcpy(tx->tx_fifo.base + info->iov[iovi].iov_offset, + data + offset, info->iov[iovi].iov_len); + gve_dma_sync_for_device(&priv->pdev->dev, + tx->tx_fifo.qpl->page_buses, + info->iov[iovi].iov_offset, + info->iov[iovi].iov_len); + offset += info->iov[iovi].iov_len; + iovi++; + reqi++; + } + + return ndescs; +} + +int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len) +{ + int nsegs; + + if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1)) + return -EBUSY; + + nsegs = gve_tx_fill_xdp(priv, tx, data, len); + tx->req += nsegs; + + return 0; +} + #define GVE_TX_START_THRESH PAGE_SIZE static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, @@ -666,8 +782,8 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u64 pkts = 0, bytes = 0; size_t space_freed = 0; struct sk_buff *skb; - int i, j; u32 idx; + int j; for (j = 0; j < to_do; j++) { idx = tx->done & tx->mask; @@ -689,12 +805,7 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, dev_consume_skb_any(skb); if (tx->raw_addressing) continue; - /* FIFO free */ - for (i = 0; i < ARRAY_SIZE(info->iov); i++) { - space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; - info->iov[i].iov_len = 0; - info->iov[i].iov_padding = 0; - } + space_freed += gve_tx_clear_buffer_state(info); } } @@ -729,6 +840,24 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv, return be32_to_cpu(counter); } +bool gve_xdp_poll(struct gve_notify_block *block, int budget) +{ + struct gve_priv *priv = block->priv; + struct gve_tx_ring *tx = block->tx; + u32 nic_done; + u32 to_do; + + /* If budget is 0, do all the work */ + if (budget == 0) + budget = INT_MAX; + + /* Find out how much work there is to be done */ + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = min_t(u32, (nic_done - tx->done), budget); + gve_clean_xdp_done(priv, tx, to_do); + return nic_done != tx->done; +} + bool gve_tx_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; -- GitLab From 39a7f4aa3e4a7947614cf1d5c27abba3300adb1e Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 15 Mar 2023 16:33:11 -0700 Subject: [PATCH 0548/2078] gve: Add XDP REDIRECT support for GQI-QPL format This patch contains the following changes: 1) Support for XDP REDIRECT action on rx 2) ndo_xdp_xmit callback support In GQI-QPL queue format, the driver needs to allocate a fixed size memory, the size specified by vNIC device, for RX/TX and register this memory as a bounce buffer with the vNIC device when a queue is created. The number of pages in the bounce buffer is limited and the pages need to be made available to the vNIC by copying the RX data out to prevent head-of-line blocking. The XDP_REDIRECT packets are therefore immediately copied to a newly allocated page. Signed-off-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 15 +++++- drivers/net/ethernet/google/gve/gve_ethtool.c | 26 ++++++---- drivers/net/ethernet/google/gve/gve_main.c | 19 ++++++++ drivers/net/ethernet/google/gve/gve_rx.c | 47 ++++++++++++++++-- drivers/net/ethernet/google/gve/gve_tx.c | 48 +++++++++++++++++-- 5 files changed, 138 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 8d5234d4ba67b..a3b2aec2c575a 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -236,6 +236,7 @@ struct gve_rx_ring { u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */ u64 xdp_tx_errors; u64 xdp_redirect_errors; + u64 xdp_alloc_fails; u64 xdp_actions[GVE_XDP_ACTIONS]; u32 q_num; /* queue index */ u32 ntfy_id; /* notification block index */ @@ -247,6 +248,7 @@ struct gve_rx_ring { /* XDP stuff */ struct xdp_rxq_info xdp_rxq; + struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; /* A TX desc ring entry */ @@ -267,7 +269,10 @@ struct gve_tx_iovec { * ring entry but only used for a pkt_desc not a seg_desc */ struct gve_tx_buffer_state { - struct sk_buff *skb; /* skb for this pkt */ + union { + struct sk_buff *skb; /* skb for this pkt */ + struct xdp_frame *xdp_frame; /* xdp_frame */ + }; struct { u16 size; /* size of xmitted xdp pkt */ } xdp; @@ -385,6 +390,8 @@ struct gve_tx_ring { struct { /* Spinlock for when cleanup in progress */ spinlock_t clean_lock; + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; }; /* DQO fields. */ @@ -462,6 +469,8 @@ struct gve_tx_ring { dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ + u64 xdp_xmit; + u64 xdp_xmit_errors; } ____cacheline_aligned; /* Wraps the info for one irq including the napi struct and the queues @@ -919,8 +928,10 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); +int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, - void *data, int len); + void *data, int len, void *frame_p); void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 067b393ccf9d7..23db0f3534a84 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -56,13 +56,14 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", "rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]", "rx_xdp_tx[%u]", "rx_xdp_redirect[%u]", - "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", + "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", "rx_xdp_alloc_fails[%u]", }; static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", "tx_dma_mapping_error[%u]", + "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { @@ -313,9 +314,10 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i + j] = rx->xdp_actions[j]; data[i + j++] = rx->xdp_tx_errors; data[i + j++] = rx->xdp_redirect_errors; + data[i + j++] = rx->xdp_alloc_fails; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); - i += GVE_XDP_ACTIONS + 2; /* XDP rx counters */ + i += GVE_XDP_ACTIONS + 3; /* XDP rx counters */ } } else { i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; @@ -371,13 +373,21 @@ gve_get_ethtool_stats(struct net_device *netdev, if (skip_nic_stats) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; - continue; - } - for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { - u64 value = - be64_to_cpu(report_stats[tx_qid_to_stats_idx[ring] + j].value); - data[i++] = value; + } else { + stats_idx = tx_qid_to_stats_idx[ring]; + for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { + u64 value = + be64_to_cpu(report_stats[stats_idx + j].value); + data[i++] = value; + } } + do { + start = u64_stats_fetch_begin(&priv->tx[ring].statss); + data[i] = tx->xdp_xmit; + data[i + 1] = tx->xdp_xmit_errors; + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + i += 2; /* XDP tx counters */ } } else { i += num_tx_queues * NUM_GVE_TX_CNTS; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index f493988579210..2e8ea4dd71e89 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1230,6 +1230,21 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) } } +static void gve_drain_page_cache(struct gve_priv *priv) +{ + struct page_frag_cache *nc; + int i; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + nc = &priv->rx[i].page_cache; + if (nc->va) { + __page_frag_cache_drain(virt_to_page(nc->va), + nc->pagecnt_bias); + nc->va = NULL; + } + } +} + static int gve_open(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -1313,6 +1328,7 @@ static int gve_close(struct net_device *dev) netif_carrier_off(dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); + gve_drain_page_cache(priv); err = gve_destroy_rings(priv); if (err) goto err; @@ -1696,6 +1712,7 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_tx_timeout = gve_tx_timeout, .ndo_set_features = gve_set_features, .ndo_bpf = gve_xdp, + .ndo_xdp_xmit = gve_xdp_xmit, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -1819,6 +1836,8 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) { if (priv->queue_format == GVE_GQI_QPL_FORMAT) { priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; + priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; + priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; } else { priv->dev->xdp_features = 0; } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 3241f6ea29be5..ed4b5a540e6d6 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -593,6 +593,35 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, return skb; } +static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx, + struct xdp_buff *orig, struct bpf_prog *xdp_prog) +{ + int total_len, len = orig->data_end - orig->data; + int headroom = XDP_PACKET_HEADROOM; + struct xdp_buff new; + void *frame; + int err; + + total_len = headroom + SKB_DATA_ALIGN(len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC); + if (!frame) { + u64_stats_update_begin(&rx->statss); + rx->xdp_alloc_fails++; + u64_stats_update_end(&rx->statss); + return -ENOMEM; + } + xdp_init_buff(&new, total_len, &rx->xdp_rxq); + xdp_prepare_buff(&new, frame, headroom, len, false); + memcpy(new.data, orig->data, len); + + err = xdp_do_redirect(dev, &new, xdp_prog); + if (err) + page_frag_free(frame); + + return err; +} + static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx, struct xdp_buff *xdp, struct bpf_prog *xprog, int xdp_act) @@ -609,8 +638,10 @@ static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx, case XDP_TX: tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); tx = &priv->tx[tx_qid]; + spin_lock(&tx->xdp_lock); err = gve_xdp_xmit_one(priv, tx, xdp->data, - xdp->data_end - xdp->data); + xdp->data_end - xdp->data, NULL); + spin_unlock(&tx->xdp_lock); if (unlikely(err)) { u64_stats_update_begin(&rx->statss); @@ -619,9 +650,13 @@ static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx, } break; case XDP_REDIRECT: - u64_stats_update_begin(&rx->statss); - rx->xdp_redirect_errors++; - u64_stats_update_end(&rx->statss); + err = gve_xdp_redirect(priv->dev, rx, xdp, xprog); + + if (unlikely(err)) { + u64_stats_update_begin(&rx->statss); + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); + } break; } u64_stats_update_begin(&rx->statss); @@ -841,6 +876,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, netdev_features_t feat) { + u64 xdp_redirects = rx->xdp_actions[XDP_REDIRECT]; u64 xdp_txs = rx->xdp_actions[XDP_TX]; struct gve_rx_ctx *ctx = &rx->ctx; struct gve_priv *priv = rx->gve; @@ -892,6 +928,9 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, if (xdp_txs != rx->xdp_actions[XDP_TX]) gve_xdp_tx_flush(priv, rx->q_num); + if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT]) + xdp_do_flush(); + /* restock ring slots */ if (!rx->data.raw_addressing) { /* In QPL mode buffs are refilled as the desc are processed */ diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 3e96ee7537ce4..d928c3c796186 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -173,6 +173,10 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, pkts++; info->xdp.size = 0; + if (info->xdp_frame) { + xdp_return_frame(info->xdp_frame); + info->xdp_frame = NULL; + } space_freed += gve_tx_clear_buffer_state(info); } @@ -233,6 +237,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) /* Make sure everything is zeroed to start */ memset(tx, 0, sizeof(*tx)); spin_lock_init(&tx->clean_lock); + spin_lock_init(&tx->xdp_lock); tx->q_num = idx; tx->mask = slots - 1; @@ -715,7 +720,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) } static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, - void *data, int len) + void *data, int len, void *frame_p) { int pad, nfrags, ndescs, iovi, offset; struct gve_tx_buffer_state *info; @@ -725,6 +730,7 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, if (pad >= GVE_TX_MAX_HEADER_SIZE) pad = 0; info = &tx->info[reqi & tx->mask]; + info->xdp_frame = frame_p; info->xdp.size = len; nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len, @@ -759,15 +765,51 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, return ndescs; } +int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx; + int i, err = 0, qid; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + qid = gve_xdp_tx_queue_id(priv, + smp_processor_id() % priv->num_xdp_queues); + + tx = &priv->tx[qid]; + + spin_lock(&tx->xdp_lock); + for (i = 0; i < n; i++) { + err = gve_xdp_xmit_one(priv, tx, frames[i]->data, + frames[i]->len, frames[i]); + if (err) + break; + } + + if (flags & XDP_XMIT_FLUSH) + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + + spin_unlock(&tx->xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xmit += n; + tx->xdp_xmit_errors += n - i; + u64_stats_update_end(&tx->statss); + + return i ? i : err; +} + int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, - void *data, int len) + void *data, int len, void *frame_p) { int nsegs; if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1)) return -EBUSY; - nsegs = gve_tx_fill_xdp(priv, tx, data, len); + nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p); tx->req += nsegs; return 0; -- GitLab From fd8e40321a12391e6f554cc637d0c4b6109682a9 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 15 Mar 2023 16:33:12 -0700 Subject: [PATCH 0549/2078] gve: Add AF_XDP zero-copy support for GQI-QPL format Adding AF_XDP zero-copy support. Note: Although these changes support AF_XDP socket in zero-copy mode, there is still a copy happening within the driver between XSK buffer pool and QPL bounce buffers in GQI-QPL format. In GQI-QPL queue format, the driver needs to allocate a fixed size memory, the size specified by vNIC device, for RX/TX and register this memory as a bounce buffer with the vNIC device when a queue is created. The number of pages in the bounce buffer is limited and the pages need to be made available to the vNIC by copying the RX data out to prevent head-of-line blocking. Therefore, we cannot pass the XSK buffer pool to the vNIC. The number of copies on RX path from the bounce buffer to XSK buffer is 2 for AF_XDP copy mode (bounce buffer -> allocated page frag -> XSK buffer) and 1 for AF_XDP zero-copy mode (bounce buffer -> XSK buffer). This patch contains the following changes: 1) Enable and disable XSK buffer pool 2) Copy XDP packets from QPL bounce buffers to XSK buffer on rx 3) Copy XDP packets from XSK buffer to QPL bounce buffers and ring the doorbell as part of XDP TX napi poll 4) ndo_xsk_wakeup callback support Signed-off-by: Praveen Kaligineedi Reviewed-by: Jeroen de Borst Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 7 + drivers/net/ethernet/google/gve/gve_ethtool.c | 14 +- drivers/net/ethernet/google/gve/gve_main.c | 174 +++++++++++++++++- drivers/net/ethernet/google/gve/gve_rx.c | 30 +++ drivers/net/ethernet/google/gve/gve_tx.c | 58 +++++- 5 files changed, 274 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index a3b2aec2c575a..e214b51d3c8bb 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -248,6 +248,8 @@ struct gve_rx_ring { /* XDP stuff */ struct xdp_rxq_info xdp_rxq; + struct xdp_rxq_info xsk_rxq; + struct xsk_buff_pool *xsk_pool; struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; @@ -275,6 +277,7 @@ struct gve_tx_buffer_state { }; struct { u16 size; /* size of xmitted xdp pkt */ + u8 is_xsk; /* xsk buff */ } xdp; union { struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ @@ -469,6 +472,10 @@ struct gve_tx_ring { dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ + struct xsk_buff_pool *xsk_pool; + u32 xdp_xsk_wakeup; + u32 xdp_xsk_done; + u64 xdp_xsk_sent; u64 xdp_xmit; u64 xdp_xmit_errors; } ____cacheline_aligned; diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 23db0f3534a84..b18804e934d36 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -62,8 +62,8 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", - "tx_dma_mapping_error[%u]", - "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" + "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]", + "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { @@ -381,13 +381,17 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = value; } } + /* XDP xsk counters */ + data[i++] = tx->xdp_xsk_wakeup; + data[i++] = tx->xdp_xsk_done; do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); - data[i] = tx->xdp_xmit; - data[i + 1] = tx->xdp_xmit_errors; + data[i] = tx->xdp_xsk_sent; + data[i + 1] = tx->xdp_xmit; + data[i + 2] = tx->xdp_xmit_errors; } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); - i += 2; /* XDP tx counters */ + i += 3; /* XDP tx counters */ } } else { i += num_tx_queues * NUM_GVE_TX_CNTS; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 2e8ea4dd71e89..57ce74315eba4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" @@ -1188,6 +1189,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) struct gve_rx_ring *rx; int err = 0; int i, j; + u32 tx_qid; if (!priv->num_xdp_queues) return 0; @@ -1204,6 +1206,24 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) MEM_TYPE_PAGE_SHARED, NULL); if (err) goto err; + rx->xsk_pool = xsk_get_pool_from_qid(dev, i); + if (rx->xsk_pool) { + err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, + napi->napi_id); + if (err) + goto err; + err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err; + xsk_pool_set_rxq_info(rx->xsk_pool, + &rx->xsk_rxq); + } + } + + for (i = 0; i < priv->num_xdp_queues; i++) { + tx_qid = gve_xdp_tx_queue_id(priv, i); + priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } return 0; @@ -1212,13 +1232,15 @@ err: rx = &priv->rx[j]; if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) xdp_rxq_info_unreg(&rx->xdp_rxq); + if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) + xdp_rxq_info_unreg(&rx->xsk_rxq); } return err; } static void gve_unreg_xdp_info(struct gve_priv *priv) { - int i; + int i, tx_qid; if (!priv->num_xdp_queues) return; @@ -1227,6 +1249,15 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) struct gve_rx_ring *rx = &priv->rx[i]; xdp_rxq_info_unreg(&rx->xdp_rxq); + if (rx->xsk_pool) { + xdp_rxq_info_unreg(&rx->xsk_rxq); + rx->xsk_pool = NULL; + } + } + + for (i = 0; i < priv->num_xdp_queues; i++) { + tx_qid = gve_xdp_tx_queue_id(priv, i); + priv->tx[tx_qid].xsk_pool = NULL; } } @@ -1469,6 +1500,140 @@ out: return err; } +static int gve_xsk_pool_enable(struct net_device *dev, + struct xsk_buff_pool *pool, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi; + struct gve_rx_ring *rx; + int tx_qid; + int err; + + if (qid >= priv->rx_cfg.num_queues) { + dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); + return -EINVAL; + } + if (xsk_pool_get_rx_frame_size(pool) < + priv->dev->max_mtu + sizeof(struct ethhdr)) { + dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); + return -EINVAL; + } + + err = xsk_pool_dma_map(pool, &priv->pdev->dev, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + if (err) + return err; + + /* If XDP prog is not installed, return */ + if (!priv->xdp_prog) + return 0; + + rx = &priv->rx[qid]; + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); + if (err) + goto err; + + err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err; + + xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +err: + if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) + xdp_rxq_info_unreg(&rx->xsk_rxq); + + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return err; +} + +static int gve_xsk_pool_disable(struct net_device *dev, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi_rx; + struct napi_struct *napi_tx; + struct xsk_buff_pool *pool; + int tx_qid; + + pool = xsk_get_pool_from_qid(dev, qid); + if (!pool) + return -EINVAL; + if (qid >= priv->rx_cfg.num_queues) + return -EINVAL; + + /* If XDP prog is not installed, unmap DMA and return */ + if (!priv->xdp_prog) + goto done; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + if (!netif_running(dev)) { + priv->rx[qid].xsk_pool = NULL; + xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); + priv->tx[tx_qid].xsk_pool = NULL; + goto done; + } + + napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; + napi_disable(napi_rx); /* make sure current rx poll is done */ + + napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; + napi_disable(napi_tx); /* make sure current tx poll is done */ + + priv->rx[qid].xsk_pool = NULL; + xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); + priv->tx[tx_qid].xsk_pool = NULL; + smp_mb(); /* Make sure it is visible to the workers on datapath */ + + napi_enable(napi_rx); + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + napi_enable(napi_tx); + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + +done: + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return 0; +} + +static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + + if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) + return -EINVAL; + + if (flags & XDP_WAKEUP_TX) { + struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; + struct napi_struct *napi = + &priv->ntfy_blocks[tx->ntfy_id].napi; + + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); + } + + tx->xdp_xsk_wakeup++; + } + + return 0; +} + static int verify_xdp_configuration(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -1512,6 +1677,11 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return gve_set_xdp(priv, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + if (xdp->xsk.pool) + return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); + else + return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); default: return -EINVAL; } @@ -1713,6 +1883,7 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_set_features = gve_set_features, .ndo_bpf = gve_xdp, .ndo_xdp_xmit = gve_xdp_xmit, + .ndo_xsk_wakeup = gve_xsk_wakeup, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -1838,6 +2009,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; + priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { priv->dev->xdp_features = 0; } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index ed4b5a540e6d6..d1da7413dc4de 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -10,6 +10,7 @@ #include #include #include +#include static void gve_rx_free_buffer(struct device *dev, struct gve_rx_slot_page_info *page_info, @@ -593,6 +594,31 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, return skb; } +static int gve_xsk_pool_redirect(struct net_device *dev, + struct gve_rx_ring *rx, + void *data, int len, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff *xdp; + int err; + + if (rx->xsk_pool->frame_len < len) + return -E2BIG; + xdp = xsk_buff_alloc(rx->xsk_pool); + if (!xdp) { + u64_stats_update_begin(&rx->statss); + rx->xdp_alloc_fails++; + u64_stats_update_end(&rx->statss); + return -ENOMEM; + } + xdp->data_end = xdp->data + len; + memcpy(xdp->data, data, len); + err = xdp_do_redirect(dev, xdp, xdp_prog); + if (err) + xsk_buff_free(xdp); + return err; +} + static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx, struct xdp_buff *orig, struct bpf_prog *xdp_prog) { @@ -602,6 +628,10 @@ static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx, void *frame; int err; + if (rx->xsk_pool) + return gve_xsk_pool_redirect(dev, rx, orig->data, + len, xdp_prog); + total_len = headroom + SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index d928c3c796186..e50510b8e7846 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -11,6 +11,7 @@ #include #include #include +#include static inline void gve_tx_put_doorbell(struct gve_priv *priv, struct gve_queue_resources *q_resources, @@ -160,6 +161,7 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 clean_end = tx->done + to_do; u64 pkts = 0, bytes = 0; size_t space_freed = 0; + u32 xsk_complete = 0; u32 idx; for (; tx->done < clean_end; tx->done++) { @@ -171,6 +173,7 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, bytes += info->xdp.size; pkts++; + xsk_complete += info->xdp.is_xsk; info->xdp.size = 0; if (info->xdp_frame) { @@ -181,6 +184,8 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, } gve_tx_free_fifo(&tx->tx_fifo, space_freed); + if (xsk_complete > 0 && tx->xsk_pool) + xsk_tx_completed(tx->xsk_pool, xsk_complete); u64_stats_update_begin(&tx->statss); tx->bytes_done += bytes; tx->pkt_done += pkts; @@ -720,7 +725,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) } static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, - void *data, int len, void *frame_p) + void *data, int len, void *frame_p, bool is_xsk) { int pad, nfrags, ndescs, iovi, offset; struct gve_tx_buffer_state *info; @@ -732,6 +737,7 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, info = &tx->info[reqi & tx->mask]; info->xdp_frame = frame_p; info->xdp.size = len; + info->xdp.is_xsk = is_xsk; nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len, &info->iov[0]); @@ -809,7 +815,7 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1)) return -EBUSY; - nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p); + nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p, false); tx->req += nsegs; return 0; @@ -882,11 +888,43 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv, return be32_to_cpu(counter); } +static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx, + int budget) +{ + struct xdp_desc desc; + int sent = 0, nsegs; + void *data; + + spin_lock(&tx->xdp_lock); + while (sent < budget) { + if (!gve_can_tx(tx, GVE_TX_START_THRESH)) + goto out; + + if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) { + tx->xdp_xsk_done = tx->xdp_xsk_wakeup; + goto out; + } + + data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr); + nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true); + tx->req += nsegs; + sent++; + } +out: + if (sent > 0) { + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + xsk_tx_release(tx->xsk_pool); + } + spin_unlock(&tx->xdp_lock); + return sent; +} + bool gve_xdp_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; struct gve_tx_ring *tx = block->tx; u32 nic_done; + bool repoll; u32 to_do; /* If budget is 0, do all the work */ @@ -897,7 +935,21 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget) nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); gve_clean_xdp_done(priv, tx, to_do); - return nic_done != tx->done; + repoll = nic_done != tx->done; + + if (tx->xsk_pool) { + int sent = gve_xsk_tx(priv, tx, budget); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + repoll |= (sent == budget); + if (xsk_uses_need_wakeup(tx->xsk_pool)) + xsk_set_tx_need_wakeup(tx->xsk_pool); + } + + /* If we still have work we want to repoll */ + return repoll; } bool gve_tx_poll(struct gve_notify_block *block, int budget) -- GitLab From b9d83ab8a708f23a4001d60e9d8d0b3be3d9f607 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:06 +0000 Subject: [PATCH 0550/2078] net/packet: annotate accesses to po->xmit po->xmit can be set from setsockopt(PACKET_QDISC_BYPASS), while read locklessly. Use READ_ONCE()/WRITE_ONCE() to avoid potential load/store tearing issues. Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4e76e2ae153e..d25dd9f63cc4f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -307,7 +307,8 @@ static void packet_cached_dev_reset(struct packet_sock *po) static bool packet_use_direct_xmit(const struct packet_sock *po) { - return po->xmit == packet_direct_xmit; + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + return READ_ONCE(po->xmit) == packet_direct_xmit; } static u16 packet_pick_tx_queue(struct sk_buff *skb) @@ -2867,7 +2868,8 @@ tpacket_error: packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = po->xmit(skb); + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + err = READ_ONCE(po->xmit)(skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -3070,7 +3072,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) virtio_net_hdr_set_proto(skb, &vnet_hdr); } - err = po->xmit(skb); + /* Paired with WRITE_ONCE() in packet_setsockopt() */ + err = READ_ONCE(po->xmit)(skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -4007,7 +4010,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + /* Paired with all lockless reads of po->xmit */ + WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit); return 0; } default: -- GitLab From ee5675ecdf7a4e713ed21d98a70c2871d6ebed01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:07 +0000 Subject: [PATCH 0551/2078] net/packet: convert po->origdev to an atomic flag syzbot/KCAN reported that po->origdev can be read while another thread is changing its value. We can avoid this splat by converting this field to an actual bit. Following patches will convert remaining 1bit fields. Fixes: 80feaacb8a64 ("[AF_PACKET]: Add option to return orig_dev to userspace.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 10 ++++------ net/packet/diag.c | 2 +- net/packet/internal.h | 22 +++++++++++++++++++++- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d25dd9f63cc4f..af7c44169b869 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2184,7 +2184,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2461,7 +2461,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -3914,9 +3914,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->origdev = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: @@ -4065,7 +4063,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->auxdata; break; case PACKET_ORIGDEV: - val = po->origdev; + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: val = po->has_vnet_hdr; diff --git a/net/packet/diag.c b/net/packet/diag.c index 07812ae5ca073..e1ac9bb375b31 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -25,7 +25,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_RUNNING; if (po->auxdata) pinfo.pdi_flags |= PDI_AUXDATA; - if (po->origdev) + if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; if (po->has_vnet_hdr) pinfo.pdi_flags |= PDI_VNETHDR; diff --git a/net/packet/internal.h b/net/packet/internal.h index 48af35b1aed25..178cd1852238d 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -116,9 +116,9 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; + unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, has_vnet_hdr:1, tp_loss:1, tp_tx_has_off:1; @@ -144,4 +144,24 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) return (struct packet_sock *)sk; } +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) +{ + return test_bit(flag, &po->flags); +} + #endif -- GitLab From fd53c297aa7b077ae98a3d3d2d3aa278a1686ba6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:08 +0000 Subject: [PATCH 0552/2078] net/packet: convert po->auxdata to an atomic flag po->auxdata can be read while another thread is changing its value, potentially raising KCSAN splat. Convert it to PACKET_SOCK_AUXDATA flag. Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 8 +++----- net/packet/diag.c | 2 +- net/packet/internal.h | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index af7c44169b869..ecd9fc27e360c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3514,7 +3514,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } - if (pkt_sk(sk)->auxdata) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; @@ -3900,9 +3900,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->auxdata = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: @@ -4060,7 +4058,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, break; case PACKET_AUXDATA: - val = po->auxdata; + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); diff --git a/net/packet/diag.c b/net/packet/diag.c index e1ac9bb375b31..d704c7bf51b20 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -23,7 +23,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags = 0; if (po->running) pinfo.pdi_flags |= PDI_RUNNING; - if (po->auxdata) + if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; diff --git a/net/packet/internal.h b/net/packet/internal.h index 178cd1852238d..3bae8ea7a36f5 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - has_vnet_hdr:1, + unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ tp_loss:1, tp_tx_has_off:1; int pressure; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 1051ce4ab64db91f7b62369ddc321ba8747f8c84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:09 +0000 Subject: [PATCH 0553/2078] net/packet: annotate accesses to po->tp_tstamp tp_tstamp is read locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++---- net/packet/diag.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ecd9fc27e360c..a27a811fa2b0d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -474,7 +474,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct timespec64 ts; __u32 ts_status; - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; @@ -2403,7 +2403,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, - po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + READ_ONCE(po->tp_tstamp) | + SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); @@ -3945,7 +3946,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tstamp = val; + WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: @@ -4097,7 +4098,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; break; case PACKET_TIMESTAMP: - val = po->tp_tstamp; + val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? diff --git a/net/packet/diag.c b/net/packet/diag.c index d704c7bf51b20..0abca32e2f878 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -18,7 +18,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; pinfo.pdi_copy_thresh = po->copy_thresh; - pinfo.pdi_tstamp = po->tp_tstamp; + pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; if (po->running) -- GitLab From 7438344660fa55b33b8234c1797c886eb73667a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:10 +0000 Subject: [PATCH 0554/2078] net/packet: convert po->tp_tx_has_off to an atomic flag This is to use existing space in po->flags, and reclaim the storage used by the non atomic bit fields. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++--- net/packet/internal.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a27a811fa2b0d..7800dc622ff37 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2672,7 +2672,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, return -EMSGSIZE; } - if (unlikely(po->tp_tx_has_off)) { + if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -3993,7 +3993,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) - po->tp_tx_has_off = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; @@ -4120,7 +4120,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: - val = po->tp_tx_has_off; + val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: val = packet_use_direct_xmit(po); diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bae8ea7a36f5..0d16a581e2713 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -119,8 +119,7 @@ struct packet_sock { unsigned long flags; unsigned int running; /* bind_lock must be held */ unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1, - tp_tx_has_off:1; + tp_loss:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, + PACKET_SOCK_TX_HAS_OFF, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 164bddace2e03f6005e650cb88f101a66ebdc05a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:11 +0000 Subject: [PATCH 0555/2078] net/packet: convert po->tp_loss to an atomic flag tp_loss can be read locklessly. Convert it to an atomic flag to avoid races. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++--- net/packet/diag.c | 2 +- net/packet/internal.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7800dc622ff37..119063c8a1c59 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2843,7 +2843,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(tp_len < 0)) { tpacket_error: - if (po->tp_loss) { + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); @@ -3886,7 +3886,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->tp_loss = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); @@ -4095,7 +4095,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_reserve; break; case PACKET_LOSS: - val = po->tp_loss; + val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: val = READ_ONCE(po->tp_tstamp); diff --git a/net/packet/diag.c b/net/packet/diag.c index 0abca32e2f878..8bb4ce6a8e617 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -29,7 +29,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_ORIGDEV; if (po->has_vnet_hdr) pinfo.pdi_flags |= PDI_VNETHDR; - if (po->tp_loss) + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); diff --git a/net/packet/internal.h b/net/packet/internal.h index 0d16a581e2713..9d406a92ede8e 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,8 +118,7 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ - tp_loss:1; + unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_ORIGDEV, PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, + PACKET_SOCK_TP_LOSS, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 50d935eafee292fc432d5ac8c8715a6492961abc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:12 +0000 Subject: [PATCH 0556/2078] net/packet: convert po->has_vnet_hdr to an atomic flag po->has_vnet_hdr can be read locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 19 ++++++++++--------- net/packet/diag.c | 2 +- net/packet/internal.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 119063c8a1c59..5a6b05e17ca21 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2309,7 +2309,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { netoff += sizeof(struct virtio_net_hdr); do_vnet = true; } @@ -2780,7 +2780,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); @@ -2809,7 +2810,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { vnet_hdr = data; data += sizeof(*vnet_hdr); tp_len -= sizeof(*vnet_hdr); @@ -2837,7 +2838,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && - !po->has_vnet_hdr && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; @@ -2856,7 +2857,7 @@ tpacket_error: } } - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; @@ -2991,7 +2992,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; @@ -3451,7 +3452,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_try_clear_pressure(pkt_sk(sk)); - if (pkt_sk(sk)->has_vnet_hdr) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) { err = packet_rcv_vnet(msg, skb, &len); if (err) goto out_free; @@ -3931,7 +3932,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->has_vnet_hdr = !!val; + packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val); ret = 0; } release_sock(sk); @@ -4065,7 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: - val = po->has_vnet_hdr; + val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR); break; case PACKET_VERSION: val = po->tp_version; diff --git a/net/packet/diag.c b/net/packet/diag.c index 8bb4ce6a8e617..56240aaf032b2 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -27,7 +27,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags |= PDI_AUXDATA; if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; - if (po->has_vnet_hdr) + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) pinfo.pdi_flags |= PDI_VNETHDR; if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; diff --git a/net/packet/internal.h b/net/packet/internal.h index 9d406a92ede8e..2521176807f4f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -118,7 +118,6 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int has_vnet_hdr:1; /* writer must hold sock lock */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_AUXDATA, PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, + PACKET_SOCK_HAS_VNET_HDR, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 61edf479818e63978cabd243b82ca80f8948a313 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:13 +0000 Subject: [PATCH 0557/2078] net/packet: convert po->running to an atomic flag Instead of consuming 32 bits for po->running, use one available bit in po->flags. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 20 ++++++++++---------- net/packet/diag.c | 2 +- net/packet/internal.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5a6b05e17ca21..ec446452bbe8d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -340,14 +340,14 @@ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); - if (!po->running) { + if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); - po->running = 1; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } @@ -369,7 +369,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) lockdep_assert_held_once(&po->bind_lock); - po->running = 0; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); @@ -389,7 +389,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } @@ -1782,7 +1782,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (po->running && + if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { @@ -3222,7 +3222,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { dev_hold(dev); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() @@ -3235,7 +3235,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } - BUG_ON(po->running); + BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; @@ -4159,7 +4159,7 @@ static int packet_notifier(struct notifier_block *this, case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) @@ -4470,7 +4470,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Detach socket from network */ spin_lock(&po->bind_lock); - was_running = po->running; + was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); @@ -4681,7 +4681,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), - po->running, + packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); diff --git a/net/packet/diag.c b/net/packet/diag.c index 56240aaf032b2..de4ced5cf3e8c 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -21,7 +21,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) pinfo.pdi_flags |= PDI_RUNNING; if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; diff --git a/net/packet/internal.h b/net/packet/internal.h index 2521176807f4f..58f042c631723 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - unsigned int running; /* bind_lock must be held */ int pressure; int ifindex; /* bound device */ __be16 num; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TX_HAS_OFF, PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, + PACKET_SOCK_RUNNING, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 791a3e9f1a86fe8eb09173c9788493b8b5c957f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 01:10:14 +0000 Subject: [PATCH 0558/2078] net/packet: convert po->pressure to an atomic flag Not only this removes some READ_ONCE()/WRITE_ONCE(), this also removes one integer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 14 ++++++++------ net/packet/internal.h | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ec446452bbe8d..7b9367b233d34 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1307,22 +1307,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po, static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { - int pressure, ret; + bool pressure; + int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; - if (READ_ONCE(po->pressure) != pressure) - WRITE_ONCE(po->pressure, pressure); + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { - if (READ_ONCE(po->pressure) && + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) - WRITE_ONCE(po->pressure, 0); + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) @@ -1409,7 +1410,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); - if (po_next != po_skip && !READ_ONCE(po_next->pressure) && + if (po_next != po_skip && + !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; diff --git a/net/packet/internal.h b/net/packet/internal.h index 58f042c631723..680703dbce5e0 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -117,7 +117,6 @@ struct packet_sock { spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned long flags; - int pressure; int ifindex; /* bound device */ __be16 num; struct packet_rollover *rollover; @@ -146,6 +145,7 @@ enum packet_sock_flags { PACKET_SOCK_TP_LOSS, PACKET_SOCK_HAS_VNET_HDR, PACKET_SOCK_RUNNING, + PACKET_SOCK_PRESSURE, }; static inline void packet_sock_flag_set(struct packet_sock *po, -- GitLab From 74bf6477c18b2904936763132e9224a41b8da13a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Mar 2023 21:49:13 -0700 Subject: [PATCH 0559/2078] netlink-specs: add partial specification for devlink Devlink is quite complex but put in the very basics so we can incrementally fill in the commands as needed. $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --dump get [{'bus-name': 'netdevsim', 'dev-name': 'netdevsim1', 'dev-stats': {'reload-stats': {'reload-action-info': {'reload-action': 1, 'reload-action-stats': {'reload-stats-entry': [{'reload-stats-limit': 0, 'reload-stats-value': 0}]}}}, 'remote-reload-stats': {'reload-action-info': {'reload-action': 2, 'reload-action-stats': {'reload-stats-entry': [{'reload-stats-limit': 0, 'reload-stats-value': 0}, {'reload-stats-limit': 1, 'reload-stats-value': 0}]}}}}, 'reload-failed': 0}] Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/specs/devlink.yaml | 198 +++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 Documentation/netlink/specs/devlink.yaml diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml new file mode 100644 index 0000000000000..90641668232e3 --- /dev/null +++ b/Documentation/netlink/specs/devlink.yaml @@ -0,0 +1,198 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: devlink + +protocol: genetlink-legacy + +doc: Partial family for Devlink. + +attribute-sets: + - + name: devlink + attributes: + - + name: bus-name + type: string + value: 1 + - + name: dev-name + type: string + - + name: port-index + type: u32 + + # TODO: fill in the attributes in between + + - + name: info-driver-name + type: string + value: 98 + - + name: info-serial-number + type: string + - + name: info-version-fixed + type: nest + multi-attr: true + nested-attributes: dl-info-version + - + name: info-version-running + type: nest + multi-attr: true + nested-attributes: dl-info-version + - + name: info-version-stored + type: nest + multi-attr: true + nested-attributes: dl-info-version + - + name: info-version-name + type: string + - + name: info-version-value + type: string + + # TODO: fill in the attributes in between + + - + name: reload-failed + type: u8 + value: 136 + + # TODO: fill in the attributes in between + + - + name: reload-action + type: u8 + value: 153 + + # TODO: fill in the attributes in between + + - + name: dev-stats + type: nest + value: 156 + nested-attributes: dl-dev-stats + - + name: reload-stats + type: nest + nested-attributes: dl-reload-stats + - + name: reload-stats-entry + type: nest + multi-attr: true + nested-attributes: dl-reload-stats-entry + - + name: reload-stats-limit + type: u8 + - + name: reload-stats-value + type: u32 + - + name: remote-reload-stats + type: nest + nested-attributes: dl-reload-stats + - + name: reload-action-info + type: nest + nested-attributes: dl-reload-act-info + - + name: reload-action-stats + type: nest + nested-attributes: dl-reload-act-stats + - + name: dl-dev-stats + subset-of: devlink + attributes: + - + name: reload-stats + type: nest + - + name: remote-reload-stats + type: nest + - + name: dl-reload-stats + subset-of: devlink + attributes: + - + name: reload-action-info + type: nest + - + name: dl-reload-act-info + subset-of: devlink + attributes: + - + name: reload-action + type: u8 + - + name: reload-action-stats + type: nest + - + name: dl-reload-act-stats + subset-of: devlink + attributes: + - + name: reload-stats-entry + type: nest + - + name: dl-reload-stats-entry + subset-of: devlink + attributes: + - + name: reload-stats-limit + type: u8 + - + name: reload-stats-value + type: u32 + - + name: dl-info-version + subset-of: devlink + attributes: + - + name: info-version-name + type: string + - + name: info-version-value + type: string + +operations: + enum-model: directional + list: + - + name: get + doc: Get devlink instances. + attribute-set: devlink + + do: + request: + value: 1 + attributes: &dev-id-attrs + - bus-name + - dev-name + reply: &get-reply + value: 3 + attributes: + - bus-name + - dev-name + - reload-failed + - reload-action + - dev-stats + dump: + reply: *get-reply + + # TODO: fill in the operations in between + + - + name: info-get + doc: Get device information, like driver name, hardware and firmware versions etc. + attribute-set: devlink + + do: + request: + value: 51 + attributes: *dev-id-attrs + reply: + value: 51 + attributes: + - bus-name + - dev-name -- GitLab From 82b3297009b6831dfe47f0f38ed4043e39f58c9f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Mar 2023 21:50:27 -0700 Subject: [PATCH 0560/2078] netlink: specs: allow uapi-header in genetlink Chuck wanted to put the UAPI header in linux/net/ which seems reasonable, allow genetlink families to choose the location. It doesn't really matter for non-C-like languages. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/genetlink-c.yaml | 2 +- Documentation/netlink/genetlink-legacy.yaml | 2 +- Documentation/netlink/genetlink.yaml | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index f082a5ad7cf1d..c83643d403b76 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -33,10 +33,10 @@ properties: protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c ] - # Start genetlink-c uapi-header: description: Path to the uAPI header, default is linux/${family-name}.h type: string + # Start genetlink-c c-family-name: description: Name of the define for the family name. type: string diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index c6b8c77f7d12e..792875dd7ed14 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -33,10 +33,10 @@ properties: protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c, genetlink-legacy ] # Trim - # Start genetlink-c uapi-header: description: Path to the uAPI header, default is linux/${family-name}.h type: string + # Start genetlink-c c-family-name: description: Name of the define for the family name. type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b2d56ab9e615d..8952e84ff2076 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -33,6 +33,9 @@ properties: protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink ] + uapi-header: + description: Path to the uAPI header, default is linux/${family-name}.h + type: string definitions: description: List of type and constant definitions (enums, flags, defines). -- GitLab From abc17a11ed29b0471e428d86189acca8d1a213c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:55 +0000 Subject: [PATCH 0561/2078] inet: preserve const qualifier in inet_sk() We can change inet_sk() to propagate const qualifier of its argument. This should avoid some potential errors caused by accidental (const -> not_const) promotion. Other helpers like tcp_sk(), udp_sk(), raw_sk() will be handled in separate patch series. v2: use container_of_const() as advised by Jakub and Linus Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20230315142841.3a2ac99a@kernel.org/ Link: https://lore.kernel.org/netdev/CAHk-=wiOf12nrYEF2vJMcucKjWPN-Ns_SW9fA7LwST_2Dzp7rw@mail.gmail.com/ Signed-off-by: David S. Miller --- include/net/inet_sock.h | 5 +---- include/trace/events/sock.h | 4 ++-- include/trace/events/tcp.h | 2 +- net/ipv4/ip_output.c | 5 +++-- net/ipv6/ping.c | 2 +- net/ipv6/udp.c | 2 +- net/mptcp/sockopt.c | 2 +- security/lsm_audit.c | 4 ++-- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 51857117ac099..caa20a9055310 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -305,10 +305,7 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) return sk_to_full_sk(skb->sk); } -static inline struct inet_sock *inet_sk(const struct sock *sk) -{ - return (struct inet_sock *)sk; -} +#define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk) static inline void __inet_sk_copy_descendant(struct sock *sk_to, const struct sock *sk_from, diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 03d19fc562f87..fd206a6ab5b85 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -158,7 +158,7 @@ TRACE_EVENT(inet_sock_set_state, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; @@ -222,7 +222,7 @@ TRACE_EVENT(inet_sk_error_report, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct in6_addr *pin6; __be32 *p32; diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 901b440238d5f..bf06db8d2046c 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -67,7 +67,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, ), TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e7bef36ce26f5..cb04dbad9ea47 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -129,7 +129,8 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ip_local_out); -static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) +static inline int ip_select_ttl(const struct inet_sock *inet, + const struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -146,7 +147,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct net *net = sock_net(sk); struct iphdr *iph; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 808983bc2ec9f..c4835dbdfcff7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -237,7 +237,7 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct ping_iter_state *) seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + struct inet_sock *inet = inet_sk((struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fb2f33ee3a76..ab4ae886235ac 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1708,7 +1708,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct udp_iter_state *)seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + const struct inet_sock *inet = inet_sk((const struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); __ip6_dgram_sock_seq_show(seq, v, srcp, destp, diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8a9656248b0f0..5cef4d3d21ac8 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1046,7 +1046,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); memset(a, 0, sizeof(*a)); diff --git a/security/lsm_audit.c b/security/lsm_audit.c index a7355b4b9bb86..00d3bdd386e29 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -317,7 +317,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, switch (sk->sk_family) { case AF_INET: { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); print_ipv4_addr(ab, inet->inet_rcv_saddr, inet->inet_sport, @@ -329,7 +329,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr, inet->inet_sport, -- GitLab From 33e972bdf0b0aa208b67164c64eef3c307e4b303 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:56 +0000 Subject: [PATCH 0562/2078] ipv4: constify ip_mc_sf_allow() socket argument This clarifies ip_mc_sf_allow() intent. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b19d3284551f0..ebf4349a53af7 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -122,7 +122,7 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, sockptr_t optval, sockptr_t optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, sockptr_t optval, size_t offset); -extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, +extern int ip_mc_sf_allow(const struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c920aa9a62a98..48ff5f13e7979 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2638,10 +2638,10 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, /* * check if a multicast source filter allows delivery for a given */ -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, +int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; -- GitLab From a0a989d3007538cd7dae9431d8ac47850ced3100 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:57 +0000 Subject: [PATCH 0563/2078] udp: constify __udp_is_mcast_sock() socket argument This clarifies __udp_is_mcast_sock() intent. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc8feb54d835f..aa32afd871ee5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -578,12 +578,12 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, EXPORT_SYMBOL_GPL(udp4_lib_lookup); #endif -static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, +static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || -- GitLab From 66eb554c6449cef8c1e1b814f74d13f264582591 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:58 +0000 Subject: [PATCH 0564/2078] ipv6: constify inet6_mc_check() inet6_mc_check() is essentially a read-only function. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/mcast.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c04f359655b86..82da55101b5a3 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -223,7 +223,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); -bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, +bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1c02160cf7a4c..714cdc9e2b8ed 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -627,12 +627,12 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; } -bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, +bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc; - struct ip6_sf_socklist *psl; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_mc_socklist *mc; + const struct ip6_sf_socklist *psl; bool rv = true; rcu_read_lock(); -- GitLab From dc3731bad8e133ca1893ae414fdebdf85a333495 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:31:59 +0000 Subject: [PATCH 0565/2078] udp6: constify __udp_v6_is_mcast_sock() socket argument This clarifies __udp_v6_is_mcast_sock() intent. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ab4ae886235ac..d350e57c47929 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -805,12 +805,12 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, +static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) return false; -- GitLab From db6af4fdb150b45e1ba6b295ccfd3df482e022d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:32:00 +0000 Subject: [PATCH 0566/2078] ipv6: raw: constify raw_v6_match() socket argument This clarifies raw_v6_match() intent. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/rawv6.h | 2 +- net/ipv6/raw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/rawv6.h b/include/net/rawv6.h index bc70909625f60..82810cbe37984 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -6,7 +6,7 @@ #include extern struct raw_hashinfo raw_v6_hashinfo; -bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bdec..6ac2f2690c44c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -64,7 +64,7 @@ struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif) { -- GitLab From 0a8c2568209ee0c3392593c7c5c7fe41c625a383 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:32:01 +0000 Subject: [PATCH 0567/2078] ipv4: raw: constify raw_v4_match() socket argument This clarifies raw_v4_match() intent. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- net/ipv4/raw.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index 2c004c20ed996..7ad15830cf384 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -22,7 +22,7 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; -bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5a..3cf68695b40dd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -116,10 +116,10 @@ void raw_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(raw_unhash_sk); -bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && -- GitLab From 736c8b52c8ada4ca93a59f7d597c5650ebc45921 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2023 15:32:02 +0000 Subject: [PATCH 0568/2078] inet_diag: constify raw_lookup() socket argument Now both raw_v4_match() and raw_v6_match() accept a const socket, raw_lookup() can do the same to clarify its role. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/raw_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94a..bca49a844f010 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -34,7 +34,7 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) * use helper to figure it out. */ -static bool raw_lookup(struct net *net, struct sock *sk, +static bool raw_lookup(struct net *net, const struct sock *sk, const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; -- GitLab From 79a22238b4f22c45cadd3b4040d644f4de320d1b Mon Sep 17 00:00:00 2001 From: Kai Shen Date: Fri, 17 Mar 2023 03:21:32 +0000 Subject: [PATCH 0569/2078] net/smc: Use percpu ref for wr tx reference The refcount wr_tx_refcnt may cause cache thrashing problems among cores and we can use percpu ref to mitigate this issue here. We gain some performance improvement with percpu ref here on our customized smc-r verion. Applying cache alignment may also mitigate this problem but it seem more reasonable to use percpu ref here. We can also replace wr_reg_refcnt with one percpu reference like wr_tx_refcnt. redis-benchmark on smc-r with atomic wr_tx_refcnt: SET: 525707.06 requests per second, p50=0.087 msec GET: 554877.38 requests per second, p50=0.087 msec redis-benchmark on the percpu_ref version: SET: 540482.06 requests per second, p50=0.087 msec GET: 570711.12 requests per second, p50=0.079 msec Cases are like "redis-benchmark -h x.x.x.x -q -t set,get -P 1 -n 5000000 -c 50 -d 10 --threads 4". Signed-off-by: Kai Shen Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/smc_core.h | 10 ++++++++-- net/smc/smc_wr.c | 35 ++++++++++++++++++++++++++++------- net/smc/smc_wr.h | 5 ++--- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 08b457c2d2942..1645fba0d2d38 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -106,7 +106,10 @@ struct smc_link { unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ - atomic_t wr_tx_refcnt; /* tx refs to link */ + struct { + struct percpu_ref wr_tx_refs; + } ____cacheline_aligned_in_smp; + struct completion tx_ref_comp; struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -122,7 +125,10 @@ struct smc_link { struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ - atomic_t wr_reg_refcnt; /* reg refs to link */ + struct { + struct percpu_ref wr_reg_refs; + } ____cacheline_aligned_in_smp; + struct completion reg_ref_comp; enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b0678a417e09d..0021065a600a0 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) if (rc) return rc; - atomic_inc(&link->wr_reg_refcnt); + percpu_ref_get(&link->wr_reg_refs); rc = wait_event_interruptible_timeout(link->wr_reg_wait, (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); - if (atomic_dec_and_test(&link->wr_reg_refcnt)) - wake_up_all(&link->wr_reg_wait); + percpu_ref_put(&link->wr_reg_refs); if (!rc) { /* timeout - terminate link */ smcr_link_down_cond_sched(link); @@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_tx_wait(lnk); smc_wr_tx_wait_no_pending_sends(lnk); - wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); - wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); + percpu_ref_kill(&lnk->wr_reg_refs); + wait_for_completion(&lnk->reg_ref_comp); + percpu_ref_kill(&lnk->wr_tx_refs); + wait_for_completion(&lnk->tx_ref_comp); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev) tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn); } +static void smcr_wr_tx_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs); + + complete(&lnk->tx_ref_comp); +} + +static void smcr_wr_reg_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs); + + complete(&lnk->reg_ref_comp); +} + int smc_wr_create_link(struct smc_link *lnk) { struct ib_device *ibdev = lnk->smcibdev->ibdev; @@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); init_waitqueue_head(&lnk->wr_tx_wait); - atomic_set(&lnk->wr_tx_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->tx_ref_comp); init_waitqueue_head(&lnk->wr_reg_wait); - atomic_set(&lnk->wr_reg_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->reg_ref_comp); init_waitqueue_head(&lnk->wr_rx_empty_wait); return rc; diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 45e9b894d3f8a..f3008dda222a6 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -63,14 +63,13 @@ static inline bool smc_wr_tx_link_hold(struct smc_link *link) { if (!smc_link_sendable(link)) return false; - atomic_inc(&link->wr_tx_refcnt); + percpu_ref_get(&link->wr_tx_refs); return true; } static inline void smc_wr_tx_link_put(struct smc_link *link) { - if (atomic_dec_and_test(&link->wr_tx_refcnt)) - wake_up_all(&link->wr_tx_wait); + percpu_ref_put(&link->wr_tx_refs); } static inline void smc_wr_drain_cq(struct smc_link *lnk) -- GitLab From bd5314f8dd2d41330eecb60f0490c3fcfe1fc99d Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Fri, 17 Mar 2023 10:56:01 +0100 Subject: [PATCH 0570/2078] kallsyms, bpf: Move find_kallsyms_symbol_value out of internal header Moving find_kallsyms_symbol_value from kernel/module/internal.h to include/linux/module.h. The reason is that internal.h is not prepared to be included when CONFIG_MODULES=n. find_kallsyms_symbol_value is used by kernel/bpf/verifier.c and including internal.h from it (without modules) leads into a compilation error: In file included from ../include/linux/container_of.h:5, from ../include/linux/list.h:5, from ../include/linux/timer.h:5, from ../include/linux/workqueue.h:9, from ../include/linux/bpf.h:10, from ../include/linux/bpf-cgroup.h:5, from ../kernel/bpf/verifier.c:7: ../kernel/bpf/../module/internal.h: In function 'mod_find': ../include/linux/container_of.h:20:54: error: invalid use of undefined type 'struct module' 20 | static_assert(__same_type(*(ptr), ((type *)0)->member) || \ | ^~ [...] This patch fixes the above error. Fixes: 31bf1dbccfb0 ("bpf: Fix attaching fentry/fexit/fmod_ret/lsm to modules") Reported-by: kernel test robot Signed-off-by: Viktor Malik Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/oe-kbuild-all/202303161404.OrmfCy09-lkp@intel.com/ Link: https://lore.kernel.org/bpf/20230317095601.386738-1-vmalik@redhat.com --- include/linux/module.h | 8 ++++++++ kernel/bpf/verifier.c | 2 +- kernel/module/internal.h | 6 ------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 4435ad9439abb..41cfd3be57e58 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -616,6 +616,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); +unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); + extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); #define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) @@ -796,6 +798,12 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name) return 0; } +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} + static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d62b7127ff2a2..99394a2f7ee49 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24,7 +24,7 @@ #include #include #include -#include "../module/internal.h" +#include #include "disasm.h" diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 5c9170f9135ce..1c877561a7d2b 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -246,7 +246,6 @@ static inline void kmemleak_load_module(const struct module *mod, void init_build_id(struct module *mod, const struct load_info *info); void layout_symtab(struct module *mod, struct load_info *info); void add_kallsyms(struct module *mod, const struct load_info *info); -unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); static inline bool sect_empty(const Elf_Shdr *sect) { @@ -256,11 +255,6 @@ static inline bool sect_empty(const Elf_Shdr *sect) static inline void init_build_id(struct module *mod, const struct load_info *info) { } static inline void layout_symtab(struct module *mod, struct load_info *info) { } static inline void add_kallsyms(struct module *mod, const struct load_info *info) { } -static inline unsigned long find_kallsyms_symbol_value(struct module *mod, - const char *name) -{ - return 0; -} #endif /* CONFIG_KALLSYMS */ #ifdef CONFIG_SYSFS -- GitLab From 0f10f647f45545004ea50b73a7a7c5c3309ff286 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 14 Mar 2023 14:44:49 +0700 Subject: [PATCH 0571/2078] bpf, docs: Use internal linking for link to netdev subsystem doc Commit d56b0c461d19da ("bpf, docs: Fix link to netdev-FAQ target") attempts to fix linking problem to undefined "netdev-FAQ" label introduced in 287f4fa99a5281 ("docs: Update references to netdev-FAQ") by changing internal cross reference to netdev subsystem documentation (Documentation/process/maintainer-netdev.rst) to external one at docs.kernel.org. However, the linking problem is still not resolved, as the generated link points to non-existent netdev-FAQ section of the external doc, which when clicked, will instead going to the top of the doc. Revert back to internal linking by simply mention the doc path while massaging the leading text to the link, since the netdev subsystem doc contains no FAQs but rather general information about the subsystem. Fixes: d56b0c461d19 ("bpf, docs: Fix link to netdev-FAQ target") Fixes: 287f4fa99a52 ("docs: Update references to netdev-FAQ") Signed-off-by: Bagas Sanjaya Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230314074449.23620-1-bagasdotme@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 5f5f9ccc3862b..e151e61dff381 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -128,7 +128,8 @@ into the bpf-next tree will make their way into net-next tree. net and net-next are both run by David S. Miller. From there, they will go into the kernel mainline tree run by Linus Torvalds. To read up on the process of net and net-next being merged into the mainline tree, see -the `netdev-FAQ`_. +the documentation on netdev subsystem at +Documentation/process/maintainer-netdev.rst. @@ -147,7 +148,8 @@ request):: Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to? --------------------------------------------------------------------------------- -A: The process is the very same as described in the `netdev-FAQ`_, +A: The process is the very same as described in the netdev subsystem +documentation at Documentation/process/maintainer-netdev.rst, so please read up on it. The subject line must indicate whether the patch is a fix or rather "next-like" content in order to let the maintainers know whether it is targeted at bpf or bpf-next. @@ -206,8 +208,9 @@ ii) run extensive BPF test suite and Once the BPF pull request was accepted by David S. Miller, then the patches end up in net or net-next tree, respectively, and make their way from there further into mainline. Again, see the -`netdev-FAQ`_ for additional information e.g. on how often they are -merged to mainline. +documentation for netdev subsystem at +Documentation/process/maintainer-netdev.rst for additional information +e.g. on how often they are merged to mainline. Q: How long do I need to wait for feedback on my BPF patches? ------------------------------------------------------------- @@ -230,7 +233,8 @@ Q: Are patches applied to bpf-next when the merge window is open? ----------------------------------------------------------------- A: For the time when the merge window is open, bpf-next will not be processed. This is roughly analogous to net-next patch processing, -so feel free to read up on the `netdev-FAQ`_ about further details. +so feel free to read up on the netdev docs at +Documentation/process/maintainer-netdev.rst about further details. During those two weeks of merge window, we might ask you to resend your patch series once bpf-next is open again. Once Linus released @@ -394,7 +398,8 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up: netdev@vger.kernel.org The process in general is the same as on netdev itself, see also the -`netdev-FAQ`_. +the documentation on networking subsystem at +Documentation/process/maintainer-netdev.rst. Q: Do you also backport to kernels not currently maintained as stable? ---------------------------------------------------------------------- @@ -410,7 +415,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well What should I do? A: The same rules apply as with netdev patch submissions in general, see -the `netdev-FAQ`_. +the netdev docs at Documentation/process/maintainer-netdev.rst. Never add "``Cc: stable@vger.kernel.org``" to the patch description, but ask the BPF maintainers to queue the patches instead. This can be done @@ -685,7 +690,6 @@ when: .. Links .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/ -.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html .. _selftests: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/ .. _Documentation/dev-tools/kselftest.rst: -- GitLab From 58aa2afbb1e61fcf35bfcc819952a3c13d9f9203 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:17 -0700 Subject: [PATCH 0572/2078] bpf: Allow ld_imm64 instruction to point to kfunc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow ld_imm64 insn with BPF_PSEUDO_BTF_ID to hold the address of kfunc. The ld_imm64 pointing to a valid kfunc will be seen as non-null PTR_TO_MEM by is_branch_taken() logic of the verifier, while libbpf will resolve address to unknown kfunc as ld_imm64 reg, 0 which will also be recognized by is_branch_taken() and the verifier will proceed dead code elimination. BPF programs can use this logic to detect at load time whether kfunc is present in the kernel with bpf_ksym_exists() macro that is introduced in the next patches. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Martin KaFai Lau Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230317201920.62030-2-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99394a2f7ee49..8bc44f5dc5b6e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15952,8 +15952,8 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, goto err_put; } - if (!btf_type_is_var(t)) { - verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id); + if (!btf_type_is_var(t) && !btf_type_is_func(t)) { + verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); err = -EINVAL; goto err_put; } @@ -15966,6 +15966,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, err = -ENOENT; goto err_put; } + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; + + if (btf_type_is_func(t)) { + aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; + aux->btf_var.mem_size = 0; + goto check_btf; + } datasec_id = find_btf_percpu_datasec(btf); if (datasec_id > 0) { @@ -15978,9 +15986,6 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, } } - insn[0].imm = (u32)addr; - insn[1].imm = addr >> 32; - type = t->type; t = btf_type_skip_modifiers(btf, type, NULL); if (percpu) { @@ -16008,7 +16013,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } - +check_btf: /* check whether we recorded this BTF (and maybe module) already */ for (i = 0; i < env->used_btf_cnt; i++) { if (env->used_btfs[i].btf == btf) { -- GitLab From 5fc13ad59b60708e52932188854d4d5bf2b0e10e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:18 -0700 Subject: [PATCH 0573/2078] libbpf: Fix relocation of kfunc ksym in ld_imm64 insn. void *p = kfunc; -> generates ld_imm64 insn. kfunc() -> generates bpf_call insn. libbpf patches bpf_call insn correctly while only btf_id part of ld_imm64 is set in the former case. Which means that pointers to kfuncs in modules are not patched correctly and the verifier rejects load of such programs due to btf_id being out of range. Fix libbpf to patch ld_imm64 for kfunc. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317201920.62030-3-alexei.starovoitov@gmail.com --- tools/lib/bpf/libbpf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a557718401e4c..4c34fbd7b5bea 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7533,6 +7533,12 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ext->is_set = true; ext->ksym.kernel_btf_id = kfunc_id; ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; + /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() + * populates FD into ld_imm64 insn when it's used to point to kfunc. + * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. + * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. + */ + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); -- GitLab From 5cbd3fe3a91df46ea201cc5d8ab4e390332ec26e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:19 -0700 Subject: [PATCH 0574/2078] libbpf: Introduce bpf_ksym_exists() macro. Introduce bpf_ksym_exists() macro that can be used by BPF programs to detect at load time whether particular ksym (either variable or kfunc) is present in the kernel. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317201920.62030-4-alexei.starovoitov@gmail.com --- tools/lib/bpf/bpf_helpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 7d12d3e620cc8..e7e1a8acc2997 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -177,6 +177,11 @@ enum libbpf_tristate { #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ + !!sym; \ +}) + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif -- GitLab From 95fdf6e313a981b0729886f86916190cb418b04c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 17 Mar 2023 13:19:20 -0700 Subject: [PATCH 0575/2078] selftests/bpf: Add test for bpf_ksym_exists(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add load and run time test for bpf_ksym_exists() and check that the verifier performs dead code elimination for non-existing kfunc. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Martin KaFai Lau Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230317201920.62030-5-alexei.starovoitov@gmail.com --- .../selftests/bpf/progs/task_kfunc_success.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 4f61596b0242f..cfa7f12b84e89 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -17,6 +17,10 @@ int err, pid; * TP_PROTO(struct task_struct *p, u64 clone_flags) */ +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; +void invalid_kfunc(void) __ksym __weak; +void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; + static bool is_test_kfunc_task(void) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -26,7 +30,21 @@ static bool is_test_kfunc_task(void) static int test_acquire_release(struct task_struct *task) { - struct task_struct *acquired; + struct task_struct *acquired = NULL; + + if (!bpf_ksym_exists(bpf_task_acquire)) { + err = 3; + return 0; + } + if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) { + err = 4; + return 0; + } + if (bpf_ksym_exists(invalid_kfunc)) { + /* the verifier's dead code elimination should remove this */ + err = 5; + asm volatile ("goto -1"); /* for (;;); */ + } acquired = bpf_task_acquire(task); bpf_task_release(acquired); -- GitLab From 2be7aa76cc69633930fb747e1d85d33a63a60c02 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Fri, 17 Mar 2023 09:32:56 -0700 Subject: [PATCH 0576/2078] selftests/bpf: Add --json-summary option to test_progs Currently, test_progs outputs all stdout/stderr as it runs, and when it is done, prints a summary. It is non-trivial for tooling to parse that output and extract meaningful information from it. This change adds a new option, `--json-summary`/`-J` that let the caller specify a file where `test_progs{,-no_alu32}` can write a summary of the run in a json format that can later be parsed by tooling. Currently, it creates a summary section with successes/skipped/failures followed by a list of failed tests and subtests. A test contains the following fields: - name: the name of the test - number: the number of the test - message: the log message that was printed by the test. - failed: A boolean indicating whether the test failed or not. Currently we only output failed tests, but in the future, successful tests could be added. - subtests: A list of subtests associated with this test. A subtest contains the following fields: - name: same as above - number: sanme as above - message: the log message that was printed by the subtest. - failed: same as above but for the subtest An example run and json content below: ``` $ sudo ./test_progs -a $(grep -v '^#' ./DENYLIST.aarch64 | awk '{print $1","}' | tr -d '\n') -j -J /tmp/test_progs.json $ jq < /tmp/test_progs.json | head -n 30 { "success": 29, "success_subtest": 23, "skipped": 3, "failed": 28, "results": [ { "name": "bpf_cookie", "number": 10, "message": "test_bpf_cookie:PASS:skel_open 0 nsec\n", "failed": true, "subtests": [ { "name": "multi_kprobe_link_api", "number": 2, "message": "kprobe_multi_link_api_subtest:PASS:load_kallsyms 0 nsec\nlibbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", "failed": true }, { "name": "multi_kprobe_attach_api", "number": 3, "message": "libbpf: extern 'bpf_testmod_fentry_test1' (strong): not resolved\nlibbpf: failed to load object 'kprobe_multi'\nlibbpf: failed to load BPF skeleton 'kprobe_multi': -3\nkprobe_multi_attach_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3\n", "failed": true }, { "name": "lsm", "number": 8, "message": "lsm_subtest:PASS:lsm.link_create 0 nsec\nlsm_subtest:FAIL:stack_mprotect unexpected stack_mprotect: actual 0 != expected -1\n", "failed": true } ``` The file can then be used to print a summary of the test run and list of failing tests/subtests: ``` $ jq -r < /tmp/test_progs.json '"Success: \(.success)/\(.success_subtest), Skipped: \(.skipped), Failed: \(.failed)"' Success: 29/23, Skipped: 3, Failed: 28 $ jq -r < /tmp/test_progs.json '.results | map([ if .failed then "#\(.number) \(.name)" else empty end, ( . as {name: $tname, number: $tnum} | .subtests | map( if .failed then "#\($tnum)/\(.number) \($tname)/\(.name)" else empty end ) ) ]) | flatten | .[]' | head -n 20 #10 bpf_cookie #10/2 bpf_cookie/multi_kprobe_link_api #10/3 bpf_cookie/multi_kprobe_attach_api #10/8 bpf_cookie/lsm #15 bpf_mod_race #15/1 bpf_mod_race/ksym (used_btfs UAF) #15/2 bpf_mod_race/kfunc (kfunc_btf_tab UAF) #36 cgroup_hierarchical_stats #61 deny_namespace #61/1 deny_namespace/unpriv_userns_create_no_bpf #73 fexit_stress #83 get_func_ip_test #99 kfunc_dynptr_param #99/1 kfunc_dynptr_param/dynptr_data_null #99/4 kfunc_dynptr_param/dynptr_data_null #100 kprobe_multi_bench_attach #100/1 kprobe_multi_bench_attach/kernel #100/2 kprobe_multi_bench_attach/modules #101 kprobe_multi_test #101/1 kprobe_multi_test/skel_api ``` Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230317163256.3809328-1-chantr4@gmail.com --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/json_writer.c | 1 + tools/testing/selftests/bpf/json_writer.h | 1 + tools/testing/selftests/bpf/test_progs.c | 83 +++++++++++++++++++++-- tools/testing/selftests/bpf/test_progs.h | 1 + 5 files changed, 84 insertions(+), 6 deletions(-) create mode 120000 tools/testing/selftests/bpf/json_writer.c create mode 120000 tools/testing/selftests/bpf/json_writer.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 55811c448eb78..fc092582d16d8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -234,6 +234,7 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o TESTING_HELPERS := $(OUTPUT)/testing_helpers.o TRACE_HELPERS := $(OUTPUT)/trace_helpers.o +JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o $(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) @@ -559,7 +560,8 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c disasm.c + cap_helpers.c test_loader.c xsk.c disasm.c \ + json_writer.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c new file mode 120000 index 0000000000000..5effa31e2f39b --- /dev/null +++ b/tools/testing/selftests/bpf/json_writer.c @@ -0,0 +1 @@ +../../../bpf/bpftool/json_writer.c \ No newline at end of file diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h new file mode 120000 index 0000000000000..e0a264c267521 --- /dev/null +++ b/tools/testing/selftests/bpf/json_writer.h @@ -0,0 +1 @@ +../../../bpf/bpftool/json_writer.h \ No newline at end of file diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6d5e3022c75f4..d903e6a72a967 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -18,6 +18,7 @@ #include #include #include +#include "json_writer.h" static bool verbose(void) { @@ -269,10 +270,23 @@ static void print_subtest_name(int test_num, int subtest_num, fprintf(env.stdout, "\n"); } +static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt) +{ + /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a + * null byte. Yet in parallel mode, log_buf will be NULL if there is no message. + */ + if (log_cnt) { + jsonw_string_field(w, "message", log_buf); + } else { + jsonw_string_field(w, "message", ""); + } +} + static void dump_test_log(const struct prog_test_def *test, const struct test_state *test_state, bool skip_ok_subtests, - bool par_exec_result) + bool par_exec_result, + json_writer_t *w) { bool test_failed = test_state->error_cnt > 0; bool force_log = test_state->force_log; @@ -296,6 +310,16 @@ static void dump_test_log(const struct prog_test_def *test, if (test_state->log_cnt && print_test) print_test_log(test_state->log_buf, test_state->log_cnt); + if (w && print_test) { + jsonw_start_object(w); + jsonw_string_field(w, "name", test->test_name); + jsonw_uint_field(w, "number", test->test_num); + jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt); + jsonw_bool_field(w, "failed", test_failed); + jsonw_name(w, "subtests"); + jsonw_start_array(w); + } + for (i = 0; i < test_state->subtest_num; i++) { subtest_state = &test_state->subtest_states[i]; subtest_failed = subtest_state->error_cnt; @@ -314,6 +338,20 @@ static void dump_test_log(const struct prog_test_def *test, test->test_name, subtest_state->name, test_result(subtest_state->error_cnt, subtest_state->skipped)); + + if (w && print_subtest) { + jsonw_start_object(w); + jsonw_string_field(w, "name", subtest_state->name); + jsonw_uint_field(w, "number", i+1); + jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt); + jsonw_bool_field(w, "failed", subtest_failed); + jsonw_end_object(w); + } + } + + if (w && print_test) { + jsonw_end_array(w); + jsonw_end_object(w); } print_test_result(test, test_state); @@ -715,6 +753,7 @@ enum ARG_KEYS { ARG_TEST_NAME_GLOB_DENYLIST = 'd', ARG_NUM_WORKERS = 'j', ARG_DEBUG = -1, + ARG_JSON_SUMMARY = 'J' }; static const struct argp_option opts[] = { @@ -740,6 +779,7 @@ static const struct argp_option opts[] = { "Number of workers to run in parallel, default to number of cpus." }, { "debug", ARG_DEBUG, NULL, 0, "print extra debug information for test_progs." }, + { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."}, {}, }; @@ -870,6 +910,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_DEBUG: env->debug = true; break; + case ARG_JSON_SUMMARY: + env->json = fopen(arg, "w"); + if (env->json == NULL) { + perror("Failed to open json summary file"); + return -errno; + } + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -1017,7 +1064,7 @@ void crash_handler(int signum) stdio_restore(); if (env.test) { env.test_state->error_cnt++; - dump_test_log(env.test, env.test_state, true, false); + dump_test_log(env.test, env.test_state, true, false, NULL); } if (env.worker_id != -1) fprintf(stderr, "[%d]: ", env.worker_id); @@ -1124,7 +1171,7 @@ static void run_one_test(int test_num) stdio_restore(); - dump_test_log(test, state, false, false); + dump_test_log(test, state, false, false, NULL); } struct dispatch_data { @@ -1283,7 +1330,7 @@ static void *dispatch_thread(void *ctx) } while (false); pthread_mutex_lock(&stdout_output_lock); - dump_test_log(test, state, false, true); + dump_test_log(test, state, false, true, NULL); pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: @@ -1308,6 +1355,7 @@ static void calculate_summary_and_print_errors(struct test_env *env) { int i; int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; + json_writer_t *w = NULL; for (i = 0; i < prog_test_cnt; i++) { struct test_state *state = &test_states[i]; @@ -1324,6 +1372,22 @@ static void calculate_summary_and_print_errors(struct test_env *env) succ_cnt++; } + if (env->json) { + w = jsonw_new(env->json); + if (!w) + fprintf(env->stderr, "Failed to create new JSON stream."); + } + + if (w) { + jsonw_start_object(w); + jsonw_uint_field(w, "success", succ_cnt); + jsonw_uint_field(w, "success_subtest", sub_succ_cnt); + jsonw_uint_field(w, "skipped", skip_cnt); + jsonw_uint_field(w, "failed", fail_cnt); + jsonw_name(w, "results"); + jsonw_start_array(w); + } + /* * We only print error logs summary when there are failed tests and * verbose mode is not enabled. Otherwise, results may be incosistent. @@ -1340,10 +1404,19 @@ static void calculate_summary_and_print_errors(struct test_env *env) if (!state->tested || !state->error_cnt) continue; - dump_test_log(test, state, true, true); + dump_test_log(test, state, true, true, w); } } + if (w) { + jsonw_end_array(w); + jsonw_end_object(w); + jsonw_destroy(&w); + } + + if (env->json) + fclose(env->json); + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 3cbf005747ed7..4b06b8347cd4f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -114,6 +114,7 @@ struct test_env { FILE *stdout; FILE *stderr; int nr_cpus; + FILE *json; int succ_cnt; /* successful tests */ int sub_succ_cnt; /* successful sub-tests */ -- GitLab From 6365ba64b4dbe8b59ddaeaa724b281f3787715d5 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 8 Mar 2023 15:05:31 +0000 Subject: [PATCH 0577/2078] ptp: kvm: Use decrypted memory in confidential guest on x86 KVM_HC_CLOCK_PAIRING currently fails inside SEV-SNP guests because the guest passes an address to static data to the host. In confidential computing the host can't access arbitrary guest memory so handling the hypercall runs into an "rmpfault". To make the hypercall work, the guest needs to explicitly mark the memory as decrypted. Do that in kvm_arch_ptp_init(), but retain the previous behavior for non-confidential guests to save us from having to allocate memory. Add a new arch-specific function (kvm_arch_ptp_exit()) to free the allocation and mark the memory as encrypted again. Signed-off-by: Jeremi Piotrowski Link: https://lore.kernel.org/r/20230308150531.477741-1-jpiotrowski@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_kvm_arm.c | 4 +++ drivers/ptp/ptp_kvm_common.c | 1 + drivers/ptp/ptp_kvm_x86.c | 59 +++++++++++++++++++++++++++++------- include/linux/ptp_kvm.h | 1 + 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_kvm_arm.c b/drivers/ptp/ptp_kvm_arm.c index b7d28c8dfb84e..e68e6943167be 100644 --- a/drivers/ptp/ptp_kvm_arm.c +++ b/drivers/ptp/ptp_kvm_arm.c @@ -22,6 +22,10 @@ int kvm_arch_ptp_init(void) return 0; } +void kvm_arch_ptp_exit(void) +{ +} + int kvm_arch_ptp_get_clock(struct timespec64 *ts) { return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL); diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c index 9141162c42379..2418977989beb 100644 --- a/drivers/ptp/ptp_kvm_common.c +++ b/drivers/ptp/ptp_kvm_common.c @@ -130,6 +130,7 @@ static struct kvm_ptp_clock kvm_ptp_clock; static void __exit ptp_kvm_exit(void) { ptp_clock_unregister(kvm_ptp_clock.ptp_clock); + kvm_arch_ptp_exit(); } static int __init ptp_kvm_init(void) diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c index 4991054a21350..902844cc1a17d 100644 --- a/drivers/ptp/ptp_kvm_x86.c +++ b/drivers/ptp/ptp_kvm_x86.c @@ -14,27 +14,64 @@ #include #include #include +#include static phys_addr_t clock_pair_gpa; -static struct kvm_clock_pairing clock_pair; +static struct kvm_clock_pairing clock_pair_glbl; +static struct kvm_clock_pairing *clock_pair; int kvm_arch_ptp_init(void) { + struct page *p; long ret; if (!kvm_para_available()) return -ENODEV; - clock_pair_gpa = slow_virt_to_phys(&clock_pair); - if (!pvclock_get_pvti_cpu0_va()) - return -ENODEV; + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + p = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + clock_pair = page_address(p); + ret = set_memory_decrypted((unsigned long)clock_pair, 1); + if (ret) { + __free_page(p); + clock_pair = NULL; + goto nofree; + } + } else { + clock_pair = &clock_pair_glbl; + } + + clock_pair_gpa = slow_virt_to_phys(clock_pair); + if (!pvclock_get_pvti_cpu0_va()) { + ret = -ENODEV; + goto err; + } ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, KVM_CLOCK_PAIRING_WALLCLOCK); - if (ret == -KVM_ENOSYS) - return -ENODEV; + if (ret == -KVM_ENOSYS) { + ret = -ENODEV; + goto err; + } return ret; + +err: + kvm_arch_ptp_exit(); +nofree: + return ret; +} + +void kvm_arch_ptp_exit(void) +{ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + WARN_ON(set_memory_encrypted((unsigned long)clock_pair, 1)); + free_page((unsigned long)clock_pair); + clock_pair = NULL; + } } int kvm_arch_ptp_get_clock(struct timespec64 *ts) @@ -49,8 +86,8 @@ int kvm_arch_ptp_get_clock(struct timespec64 *ts) return -EOPNOTSUPP; } - ts->tv_sec = clock_pair.sec; - ts->tv_nsec = clock_pair.nsec; + ts->tv_sec = clock_pair->sec; + ts->tv_nsec = clock_pair->nsec; return 0; } @@ -81,9 +118,9 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret); return -EOPNOTSUPP; } - tspec->tv_sec = clock_pair.sec; - tspec->tv_nsec = clock_pair.nsec; - *cycle = __pvclock_read_cycles(src, clock_pair.tsc); + tspec->tv_sec = clock_pair->sec; + tspec->tv_nsec = clock_pair->nsec; + *cycle = __pvclock_read_cycles(src, clock_pair->tsc); } while (pvclock_read_retry(src, version)); *cs = &kvm_clock; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index c2e28deef33a8..746fd67c34806 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -14,6 +14,7 @@ struct timespec64; struct clocksource; int kvm_arch_ptp_init(void); +void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, struct clocksource **cs); -- GitLab From 4dd2744fae6dc705c5d2298fe0f0812d34f5b64e Mon Sep 17 00:00:00 2001 From: mengyuanlou Date: Wed, 15 Mar 2023 17:18:46 +0800 Subject: [PATCH 0578/2078] net: wangxun: Remove macro that is redefined Remove PCI_VENDOR_ID_WANGXUN which is redefined in drivers/pci/quirks. Signed-off-by: mengyuanlou Link: https://lore.kernel.org/r/20230315091846.17314-1-mengyuanlou@net-swift.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_type.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 2b9efd13c500d..2bec5b1bc196d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -7,11 +7,6 @@ #include #include -/* Vendor ID */ -#ifndef PCI_VENDOR_ID_WANGXUN -#define PCI_VENDOR_ID_WANGXUN 0x8088 -#endif - #define WX_NCSI_SUP 0x8000 #define WX_NCSI_MASK 0x8000 #define WX_WOL_SUP 0x4000 -- GitLab From b1a2de9ccfe63b50e493ebb57b85beb5785200c7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 15 Mar 2023 17:38:43 +0100 Subject: [PATCH 0579/2078] net: dsa: mv88e6xxx: don't dispose of Global2 IRQ mappings from mdiobus code irq_find_mapping() does not need irq_dispose_mapping(), only irq_create_mapping() does. Calling irq_dispose_mapping() from mv88e6xxx_g2_irq_mdio_free() and from the error path of mv88e6xxx_g2_irq_mdio_setup() effectively means that the mdiobus logic (for internal PHY interrupts) is disposing of a hwirq->virq mapping which it is not responsible of (but instead, the function pair mv88e6xxx_g2_irq_setup() + mv88e6xxx_g2_irq_free() is). With the current code structure, this isn't such a huge problem, because mv88e6xxx_g2_irq_mdio_free() is called relatively close to the real owner of the IRQ mappings: mv88e6xxx_remove() -> mv88e6xxx_unregister_switch() -> mv88e6xxx_mdios_unregister() -> mv88e6xxx_g2_irq_mdio_free() -> mv88e6xxx_g2_irq_free() and the switch isn't 'live' in any way such that it would be able of generating interrupts at this point (mv88e6xxx_unregister_switch() has been called). However, there is a desire to split mv88e6xxx_mdios_unregister() and mv88e6xxx_g2_irq_free() such that mv88e6xxx_mdios_unregister() only gets called from mv88e6xxx_teardown(). This is much more problematic, as can be seen below. In a cross-chip scenario (say 3 switches d0032004.mdio-mii:10, d0032004.mdio-mii:11 and d0032004.mdio-mii:12 which form a single DSA tree), it is possible to unbind the device driver from a single switch (say d0032004.mdio-mii:10). When that happens, mv88e6xxx_remove() will be called for just that one switch, and this will call mv88e6xxx_unregister_switch() which will tear down the entire tree (calling mv88e6xxx_teardown() for all 3 switches). Assuming mv88e6xxx_mdios_unregister() was moved to mv88e6xxx_teardown(), at this stage, all 3 switches will have called irq_dispose_mapping() on their mdiobus virqs. When we bind again the device driver to d0032004.mdio-mii:10, mv88e6xxx_probe() is called for it, which calls dsa_register_switch(). The DSA tree is now complete again, and mv88e6xxx_setup() is called for all 3 switches. Also assuming that mv88e6xxx_mdios_register() is moved to mv88e6xxx_setup() (the 2 assumptions go together), at this point, d0032004.mdio-mii:11 and d0032004.mdio-mii:12 don't have an IRQ mapping for the internal PHYs anymore, as they've disposed of it in mv88e6xxx_teardown(). Whereas switch d0032004.mdio-mii:10 has re-created it, because its code path comes from mv88e6xxx_probe(). Simply put, this change prepares the driver to handle the movement of mv88e6xxx_mdios_register() to mv88e6xxx_setup() for cross-chip DSA trees. Also, the code being deleted was partially wrong anyway (in a way which may have hidden this other issue). mv88e6xxx_g2_irq_mdio_setup() populates bus->irq[] starting with offset chip->info->phy_base_addr, but the teardown path doesn't apply that offset too. So it disposes of virq 0 for phy = [ 0, phy_base_addr ). All switch families have phy_base_addr = 0, except for MV88E6141 and MV88E6341 which have it as 0x10. I guess those families would have happened to work by mistake in cross-chip scenarios too. I'm deleting the body of mv88e6xxx_g2_irq_mdio_free() but leaving its call sites and prototype in place. This is because, if we ever need to add back some teardown procedure in the future, it will be perhaps error-prone to deduce the proper call sites again. Whereas like this, no extra code should get generated, it shouldn't bother anybody. Signed-off-by: Vladimir Oltean Signed-off-by: Klaus Kudielka Tested-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global2.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index ed3b2f88e7835..a26546d3d7b57 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1176,31 +1176,19 @@ out: int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip, struct mii_bus *bus) { - int phy, irq, err, err_phy; + int phy, irq; for (phy = 0; phy < chip->info->num_internal_phys; phy++) { irq = irq_find_mapping(chip->g2_irq.domain, phy); - if (irq < 0) { - err = irq; - goto out; - } + if (irq < 0) + return irq; + bus->irq[chip->info->phy_base_addr + phy] = irq; } return 0; -out: - err_phy = phy; - - for (phy = 0; phy < err_phy; phy++) - irq_dispose_mapping(bus->irq[phy]); - - return err; } void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip, struct mii_bus *bus) { - int phy; - - for (phy = 0; phy < chip->info->num_internal_phys; phy++) - irq_dispose_mapping(bus->irq[phy]); } -- GitLab From f1bee740fa821c28582bb17c42e2242041e598b8 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 15 Mar 2023 17:38:44 +0100 Subject: [PATCH 0580/2078] net: dsa: mv88e6xxx: re-order functions Move mv88e6xxx_setup() below mv88e6xxx_mdios_register(), so that we are able to call the latter one from here. Do the same thing for the inverse functions. Signed-off-by: Klaus Kudielka Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 358 +++++++++++++++---------------- 1 file changed, 179 insertions(+), 179 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 30383c4f8fd0e..184c116dfc8fc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3680,185 +3680,6 @@ static int mv88e6390_setup_errata(struct mv88e6xxx_chip *chip) return mv88e6xxx_software_reset(chip); } -static void mv88e6xxx_teardown(struct dsa_switch *ds) -{ - mv88e6xxx_teardown_devlink_params(ds); - dsa_devlink_resources_unregister(ds); - mv88e6xxx_teardown_devlink_regions_global(ds); -} - -static int mv88e6xxx_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_chip *chip = ds->priv; - u8 cmode; - int err; - int i; - - chip->ds = ds; - ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); - - /* Since virtual bridges are mapped in the PVT, the number we support - * depends on the physical switch topology. We need to let DSA figure - * that out and therefore we cannot set this at dsa_register_switch() - * time. - */ - if (mv88e6xxx_has_pvt(chip)) - ds->max_num_bridges = MV88E6XXX_MAX_PVT_SWITCHES - - ds->dst->last_switch - 1; - - mv88e6xxx_reg_lock(chip); - - if (chip->info->ops->setup_errata) { - err = chip->info->ops->setup_errata(chip); - if (err) - goto unlock; - } - - /* Cache the cmode of each port. */ - for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { - if (chip->info->ops->port_get_cmode) { - err = chip->info->ops->port_get_cmode(chip, i, &cmode); - if (err) - goto unlock; - - chip->ports[i].cmode = cmode; - } - } - - err = mv88e6xxx_vtu_setup(chip); - if (err) - goto unlock; - - /* Must be called after mv88e6xxx_vtu_setup (which flushes the - * VTU, thereby also flushing the STU). - */ - err = mv88e6xxx_stu_setup(chip); - if (err) - goto unlock; - - /* Setup Switch Port Registers */ - for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { - if (dsa_is_unused_port(ds, i)) - continue; - - /* Prevent the use of an invalid port. */ - if (mv88e6xxx_is_invalid_port(chip, i)) { - dev_err(chip->dev, "port %d is invalid\n", i); - err = -EINVAL; - goto unlock; - } - - err = mv88e6xxx_setup_port(chip, i); - if (err) - goto unlock; - } - - err = mv88e6xxx_irl_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_mac_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_phy_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_pvt_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_atu_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_broadcast_setup(chip, 0); - if (err) - goto unlock; - - err = mv88e6xxx_pot_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_rmu_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_rsvd2cpu_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_trunk_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_devmap_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_pri_setup(chip); - if (err) - goto unlock; - - /* Setup PTP Hardware Clock and timestamping */ - if (chip->info->ptp_support) { - err = mv88e6xxx_ptp_setup(chip); - if (err) - goto unlock; - - err = mv88e6xxx_hwtstamp_setup(chip); - if (err) - goto unlock; - } - - err = mv88e6xxx_stats_setup(chip); - if (err) - goto unlock; - -unlock: - mv88e6xxx_reg_unlock(chip); - - if (err) - return err; - - /* Have to be called without holding the register lock, since - * they take the devlink lock, and we later take the locks in - * the reverse order when getting/setting parameters or - * resource occupancy. - */ - err = mv88e6xxx_setup_devlink_resources(ds); - if (err) - return err; - - err = mv88e6xxx_setup_devlink_params(ds); - if (err) - goto out_resources; - - err = mv88e6xxx_setup_devlink_regions_global(ds); - if (err) - goto out_params; - - return 0; - -out_params: - mv88e6xxx_teardown_devlink_params(ds); -out_resources: - dsa_devlink_resources_unregister(ds); - - return err; -} - -static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) -{ - return mv88e6xxx_setup_devlink_regions_port(ds, port); -} - -static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) -{ - mv88e6xxx_teardown_devlink_regions_port(ds, port); -} - /* prod_id for switch families which do not have a PHY model number */ static const u16 family_prod_id_table[] = { [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, @@ -4062,6 +3883,185 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, return 0; } +static void mv88e6xxx_teardown(struct dsa_switch *ds) +{ + mv88e6xxx_teardown_devlink_params(ds); + dsa_devlink_resources_unregister(ds); + mv88e6xxx_teardown_devlink_regions_global(ds); +} + +static int mv88e6xxx_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_chip *chip = ds->priv; + u8 cmode; + int err; + int i; + + chip->ds = ds; + ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + + /* Since virtual bridges are mapped in the PVT, the number we support + * depends on the physical switch topology. We need to let DSA figure + * that out and therefore we cannot set this at dsa_register_switch() + * time. + */ + if (mv88e6xxx_has_pvt(chip)) + ds->max_num_bridges = MV88E6XXX_MAX_PVT_SWITCHES - + ds->dst->last_switch - 1; + + mv88e6xxx_reg_lock(chip); + + if (chip->info->ops->setup_errata) { + err = chip->info->ops->setup_errata(chip); + if (err) + goto unlock; + } + + /* Cache the cmode of each port. */ + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { + if (chip->info->ops->port_get_cmode) { + err = chip->info->ops->port_get_cmode(chip, i, &cmode); + if (err) + goto unlock; + + chip->ports[i].cmode = cmode; + } + } + + err = mv88e6xxx_vtu_setup(chip); + if (err) + goto unlock; + + /* Must be called after mv88e6xxx_vtu_setup (which flushes the + * VTU, thereby also flushing the STU). + */ + err = mv88e6xxx_stu_setup(chip); + if (err) + goto unlock; + + /* Setup Switch Port Registers */ + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { + if (dsa_is_unused_port(ds, i)) + continue; + + /* Prevent the use of an invalid port. */ + if (mv88e6xxx_is_invalid_port(chip, i)) { + dev_err(chip->dev, "port %d is invalid\n", i); + err = -EINVAL; + goto unlock; + } + + err = mv88e6xxx_setup_port(chip, i); + if (err) + goto unlock; + } + + err = mv88e6xxx_irl_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_mac_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_phy_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_pvt_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_atu_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_broadcast_setup(chip, 0); + if (err) + goto unlock; + + err = mv88e6xxx_pot_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_rmu_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_rsvd2cpu_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_trunk_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_devmap_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_pri_setup(chip); + if (err) + goto unlock; + + /* Setup PTP Hardware Clock and timestamping */ + if (chip->info->ptp_support) { + err = mv88e6xxx_ptp_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_hwtstamp_setup(chip); + if (err) + goto unlock; + } + + err = mv88e6xxx_stats_setup(chip); + if (err) + goto unlock; + +unlock: + mv88e6xxx_reg_unlock(chip); + + if (err) + return err; + + /* Have to be called without holding the register lock, since + * they take the devlink lock, and we later take the locks in + * the reverse order when getting/setting parameters or + * resource occupancy. + */ + err = mv88e6xxx_setup_devlink_resources(ds); + if (err) + return err; + + err = mv88e6xxx_setup_devlink_params(ds); + if (err) + goto out_resources; + + err = mv88e6xxx_setup_devlink_regions_global(ds); + if (err) + goto out_params; + + return 0; + +out_params: + mv88e6xxx_teardown_devlink_params(ds); +out_resources: + dsa_devlink_resources_unregister(ds); + + return err; +} + +static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) +{ + return mv88e6xxx_setup_devlink_regions_port(ds, port); +} + +static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) +{ + mv88e6xxx_teardown_devlink_regions_port(ds, port); +} + static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; -- GitLab From 2cb0658d4f88608a19b3833789ac0ac8a4214f0b Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 15 Mar 2023 17:38:45 +0100 Subject: [PATCH 0581/2078] net: dsa: mv88e6xxx: move call to mv88e6xxx_mdios_register() Call the rather expensive mv88e6xxx_mdios_register() at the beginning of mv88e6xxx_setup(). This avoids the double call via mv88e6xxx_probe() during boot. For symmetry, call mv88e6xxx_mdios_unregister() at the end of mv88e6xxx_teardown(). Link: https://lore.kernel.org/lkml/449bde236c08d5ab5e54abd73b645d8b29955894.camel@gmail.com/ Suggested-by: Andrew Lunn Signed-off-by: Klaus Kudielka Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 184c116dfc8fc..de25f755725ee 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3848,9 +3848,9 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) } } -static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, - struct device_node *np) +static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip) { + struct device_node *np = chip->dev->of_node; struct device_node *child; int err; @@ -3885,9 +3885,12 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, static void mv88e6xxx_teardown(struct dsa_switch *ds) { + struct mv88e6xxx_chip *chip = ds->priv; + mv88e6xxx_teardown_devlink_params(ds); dsa_devlink_resources_unregister(ds); mv88e6xxx_teardown_devlink_regions_global(ds); + mv88e6xxx_mdios_unregister(chip); } static int mv88e6xxx_setup(struct dsa_switch *ds) @@ -3897,6 +3900,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) int err; int i; + err = mv88e6xxx_mdios_register(chip); + if (err) + return err; + chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); @@ -4023,7 +4030,7 @@ unlock: mv88e6xxx_reg_unlock(chip); if (err) - return err; + goto out_mdios; /* Have to be called without holding the register lock, since * they take the devlink lock, and we later take the locks in @@ -4032,7 +4039,7 @@ unlock: */ err = mv88e6xxx_setup_devlink_resources(ds); if (err) - return err; + goto out_mdios; err = mv88e6xxx_setup_devlink_params(ds); if (err) @@ -4048,6 +4055,8 @@ out_params: mv88e6xxx_teardown_devlink_params(ds); out_resources: dsa_devlink_resources_unregister(ds); +out_mdios: + mv88e6xxx_mdios_unregister(chip); return err; } @@ -7228,18 +7237,12 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) goto out_g1_atu_prob_irq; - err = mv88e6xxx_mdios_register(chip, np); - if (err) - goto out_g1_vtu_prob_irq; - err = mv88e6xxx_register_switch(chip); if (err) - goto out_mdio; + goto out_g1_vtu_prob_irq; return 0; -out_mdio: - mv88e6xxx_mdios_unregister(chip); out_g1_vtu_prob_irq: mv88e6xxx_g1_vtu_prob_irq_free(chip); out_g1_atu_prob_irq: @@ -7276,7 +7279,6 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); - mv88e6xxx_mdios_unregister(chip); mv88e6xxx_g1_vtu_prob_irq_free(chip); mv88e6xxx_g1_atu_prob_irq_free(chip); -- GitLab From 2c7e46edbd0308c12b8232ff4270d0894cbdce44 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 15 Mar 2023 17:38:46 +0100 Subject: [PATCH 0582/2078] net: dsa: mv88e6xxx: mask apparently non-existing phys during probing To avoid excessive mdio bus transactions during probing, mask all phy addresses that do not exist (there is a 1:1 mapping between switch port number and phy address). Suggested-by: Andrew Lunn Signed-off-by: Klaus Kudielka Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index de25f755725ee..260e38c5c6e66 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3805,6 +3805,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, bus->read_c45 = mv88e6xxx_mdio_read_c45; bus->write_c45 = mv88e6xxx_mdio_write_c45; bus->parent = chip->dev; + bus->phy_mask = GENMASK(31, mv88e6xxx_num_ports(chip)); if (!external) { err = mv88e6xxx_g2_irq_mdio_setup(chip, bus); -- GitLab From 0de10fd6eb94259a749d558ee0d34083ae010a1d Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 15 Mar 2023 14:43:05 -0500 Subject: [PATCH 0583/2078] dt-bindings: net: qcom,ipa: add SDX65 compatible Add support for SDX65, which uses IPA v5.0. Reviewed-by: Simon Horman Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20230315194305.1647311-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4aeda379726fa..2d5e4ffb2f9ef 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -49,6 +49,7 @@ properties: - qcom,sc7280-ipa - qcom,sdm845-ipa - qcom,sdx55-ipa + - qcom,sdx65-ipa - qcom,sm6350-ipa - qcom,sm8350-ipa -- GitLab From ed0578a46c5f08647f5ecb0e3c735a53267426f7 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 16 Mar 2023 14:00:50 +0530 Subject: [PATCH 0584/2078] net: macb: Increase halt timeout to accommodate 10Mbps link Increase halt timeout to accommodate for 16K SRAM at 10Mbps rounded. Signed-off-by: Harini Katakam Signed-off-by: Michal Simek Signed-off-by: Radhey Shyam Pandey Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20230316083050.2108-1-harini.katakam@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 26f41243743b4..c4edd20c1c668 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -94,8 +94,7 @@ struct sifive_fu540_macb_mgmt { /* Graceful stop timeouts in us. We should allow up to * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) */ -#define MACB_HALT_TIMEOUT 1230 - +#define MACB_HALT_TIMEOUT 14000 #define MACB_PM_TIMEOUT 100 /* ms */ #define MACB_MDIO_TIMEOUT 1000000 /* in usecs */ -- GitLab From 36bd28c1cb0dbf48645cfe43159907fb3253b33a Mon Sep 17 00:00:00 2001 From: haozhe chang Date: Thu, 16 Mar 2023 17:58:20 +0800 Subject: [PATCH 0585/2078] wwan: core: Support slicing in port TX flow of WWAN subsystem wwan_port_fops_write inputs the SKB parameter to the TX callback of the WWAN device driver. However, the WWAN device (e.g., t7xx) may have an MTU less than the size of SKB, causing the TX buffer to be sliced and copied once more in the WWAN device driver. This patch implements the slicing in the WWAN subsystem and gives the WWAN devices driver the option to slice(by frag_len) or not. By doing so, the additional memory copy is reduced. Meanwhile, this patch gives WWAN devices driver the option to reserve headroom in fragments for the device-specific metadata. Signed-off-by: haozhe chang Reviewed-by: Loic Poulain Link: https://lore.kernel.org/r/20230316095826.181904-1-haozhe.chang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_port.c | 3 +- drivers/net/wwan/mhi_wwan_ctrl.c | 2 +- drivers/net/wwan/rpmsg_wwan_ctrl.c | 2 +- drivers/net/wwan/t7xx/t7xx_port_wwan.c | 36 ++++++++-------- drivers/net/wwan/wwan_core.c | 58 +++++++++++++++++++------- drivers/net/wwan/wwan_hwsim.c | 2 +- drivers/usb/class/cdc-wdm.c | 3 +- include/linux/wwan.h | 11 +++++ 8 files changed, 81 insertions(+), 36 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.c b/drivers/net/wwan/iosm/iosm_ipc_port.c index b6d81c6272771..5d5b4183e14a3 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_port.c +++ b/drivers/net/wwan/iosm/iosm_ipc_port.c @@ -63,7 +63,8 @@ struct iosm_cdev *ipc_port_init(struct iosm_imem *ipc_imem, ipc_port->ipc_imem = ipc_imem; ipc_port->iosm_port = wwan_create_port(ipc_port->dev, port_type, - &ipc_wwan_ctrl_ops, ipc_port); + &ipc_wwan_ctrl_ops, NULL, + ipc_port); return ipc_port; } diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index f7ca52353f401..e9f979d2d851f 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -237,7 +237,7 @@ static int mhi_wwan_ctrl_probe(struct mhi_device *mhi_dev, /* Register as a wwan port, id->driver_data contains wwan port type */ port = wwan_create_port(&cntrl->mhi_dev->dev, id->driver_data, - &wwan_pops, mhiwwan); + &wwan_pops, NULL, mhiwwan); if (IS_ERR(port)) { kfree(mhiwwan); return PTR_ERR(port); diff --git a/drivers/net/wwan/rpmsg_wwan_ctrl.c b/drivers/net/wwan/rpmsg_wwan_ctrl.c index 31c24420ab2e4..06f4b02f15527 100644 --- a/drivers/net/wwan/rpmsg_wwan_ctrl.c +++ b/drivers/net/wwan/rpmsg_wwan_ctrl.c @@ -129,7 +129,7 @@ static int rpmsg_wwan_ctrl_probe(struct rpmsg_device *rpdev) /* Register as a wwan port, id.driver_data contains wwan port type */ port = wwan_create_port(parent, rpdev->id.driver_data, - &rpmsg_wwan_pops, rpwwan); + &rpmsg_wwan_pops, NULL, rpwwan); if (IS_ERR(port)) return PTR_ERR(port); diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c index 24bd21942403e..17389c8f6600a 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c +++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c @@ -54,13 +54,13 @@ static void t7xx_port_ctrl_stop(struct wwan_port *port) static int t7xx_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb) { struct t7xx_port *port_private = wwan_port_get_drvdata(port); - size_t len, offset, chunk_len = 0, txq_mtu = CLDMA_MTU; const struct t7xx_port_conf *port_conf; + struct sk_buff *cur = skb, *cloned; struct t7xx_fsm_ctl *ctl; enum md_state md_state; + int cnt = 0, ret; - len = skb->len; - if (!len || !port_private->chan_enable) + if (!port_private->chan_enable) return -EINVAL; port_conf = port_private->port_conf; @@ -72,23 +72,21 @@ static int t7xx_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb) return -ENODEV; } - for (offset = 0; offset < len; offset += chunk_len) { - struct sk_buff *skb_ccci; - int ret; - - chunk_len = min(len - offset, txq_mtu - sizeof(struct ccci_header)); - skb_ccci = t7xx_port_alloc_skb(chunk_len); - if (!skb_ccci) - return -ENOMEM; - - skb_put_data(skb_ccci, skb->data + offset, chunk_len); - ret = t7xx_port_send_skb(port_private, skb_ccci, 0, 0); + while (cur) { + cloned = skb_clone(cur, GFP_KERNEL); + cloned->len = skb_headlen(cur); + ret = t7xx_port_send_skb(port_private, cloned, 0, 0); if (ret) { - dev_kfree_skb_any(skb_ccci); + dev_kfree_skb(cloned); dev_err(port_private->dev, "Write error on %s port, %d\n", port_conf->name, ret); - return ret; + return cnt ? cnt + ret : ret; } + cnt += cur->len; + if (cur == skb) + cur = skb_shinfo(skb)->frag_list; + else + cur = cur->next; } dev_kfree_skb(skb); @@ -154,13 +152,17 @@ static int t7xx_port_wwan_disable_chl(struct t7xx_port *port) static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int state) { const struct t7xx_port_conf *port_conf = port->port_conf; + unsigned int header_len = sizeof(struct ccci_header); + struct wwan_port_caps caps; if (state != MD_STATE_READY) return; if (!port->wwan.wwan_port) { + caps.frag_len = CLDMA_MTU - header_len; + caps.headroom_len = header_len; port->wwan.wwan_port = wwan_create_port(port->dev, port_conf->port_type, - &wwan_ops, port); + &wwan_ops, &caps, port); if (IS_ERR(port->wwan.wwan_port)) dev_err(port->dev, "Unable to create WWWAN port %s", port_conf->name); } diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 966d0ccd2276b..2e1c01cf00a99 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -67,6 +67,8 @@ struct wwan_device { * @rxq: Buffer inbound queue * @waitqueue: The waitqueue for port fops (read/write/poll) * @data_lock: Port specific data access serialization + * @headroom_len: SKB reserved headroom size + * @frag_len: Length to fragment packet * @at_data: AT port specific data */ struct wwan_port { @@ -79,6 +81,8 @@ struct wwan_port { struct sk_buff_head rxq; wait_queue_head_t waitqueue; struct mutex data_lock; /* Port specific data access serialization */ + size_t headroom_len; + size_t frag_len; union { struct { struct ktermios termios; @@ -426,6 +430,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, const struct wwan_port_ops *ops, + struct wwan_port_caps *caps, void *drvdata) { struct wwan_device *wwandev; @@ -459,6 +464,8 @@ struct wwan_port *wwan_create_port(struct device *parent, port->type = type; port->ops = ops; + port->frag_len = caps ? caps->frag_len : SIZE_MAX; + port->headroom_len = caps ? caps->headroom_len : 0; mutex_init(&port->ops_lock); skb_queue_head_init(&port->rxq); init_waitqueue_head(&port->waitqueue); @@ -702,30 +709,53 @@ static ssize_t wwan_port_fops_read(struct file *filp, char __user *buf, static ssize_t wwan_port_fops_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) { + struct sk_buff *skb, *head = NULL, *tail = NULL; struct wwan_port *port = filp->private_data; - struct sk_buff *skb; + size_t frag_len, remain = count; int ret; ret = wwan_wait_tx(port, !!(filp->f_flags & O_NONBLOCK)); if (ret) return ret; - skb = alloc_skb(count, GFP_KERNEL); - if (!skb) - return -ENOMEM; + do { + frag_len = min(remain, port->frag_len); + skb = alloc_skb(frag_len + port->headroom_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto freeskb; + } + skb_reserve(skb, port->headroom_len); + + if (!head) { + head = skb; + } else if (!tail) { + skb_shinfo(head)->frag_list = skb; + tail = skb; + } else { + tail->next = skb; + tail = skb; + } - if (copy_from_user(skb_put(skb, count), buf, count)) { - kfree_skb(skb); - return -EFAULT; - } + if (copy_from_user(skb_put(skb, frag_len), buf + count - remain, frag_len)) { + ret = -EFAULT; + goto freeskb; + } - ret = wwan_port_op_tx(port, skb, !!(filp->f_flags & O_NONBLOCK)); - if (ret) { - kfree_skb(skb); - return ret; - } + if (skb != head) { + head->data_len += skb->len; + head->len += skb->len; + head->truesize += skb->truesize; + } + } while (remain -= frag_len); + + ret = wwan_port_op_tx(port, head, !!(filp->f_flags & O_NONBLOCK)); + if (!ret) + return count; - return count; +freeskb: + kfree_skb(head); + return ret; } static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait) diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c index 2397a903d8f54..dfbdaa259a3f0 100644 --- a/drivers/net/wwan/wwan_hwsim.c +++ b/drivers/net/wwan/wwan_hwsim.c @@ -205,7 +205,7 @@ static struct wwan_hwsim_port *wwan_hwsim_port_new(struct wwan_hwsim_dev *dev) port->wwan = wwan_create_port(&dev->dev, WWAN_PORT_AT, &wwan_hwsim_port_ops, - port); + NULL, port); if (IS_ERR(port->wwan)) { err = PTR_ERR(port->wwan); goto err_free_port; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 1f0951be15ab7..c553decb54610 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -929,7 +929,8 @@ static void wdm_wwan_init(struct wdm_device *desc) return; } - port = wwan_create_port(&intf->dev, desc->wwanp_type, &wdm_wwan_port_ops, desc); + port = wwan_create_port(&intf->dev, desc->wwanp_type, &wdm_wwan_port_ops, + NULL, desc); if (IS_ERR(port)) { dev_err(&intf->dev, "%s: Unable to create WWAN port\n", dev_name(intf->usb_dev)); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 24d76500b1cc9..01fa15506286d 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -64,11 +64,21 @@ struct wwan_port_ops { poll_table *wait); }; +/** struct wwan_port_caps - The WWAN port capbilities + * @frag_len: WWAN port TX fragments length + * @headroom_len: WWAN port TX fragments reserved headroom length + */ +struct wwan_port_caps { + size_t frag_len; + unsigned int headroom_len; +}; + /** * wwan_create_port - Add a new WWAN port * @parent: Device to use as parent and shared by all WWAN ports * @type: WWAN port type * @ops: WWAN port operations + * @caps: WWAN port capabilities * @drvdata: Pointer to caller driver data * * Allocate and register a new WWAN port. The port will be automatically exposed @@ -86,6 +96,7 @@ struct wwan_port_ops { struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, const struct wwan_port_ops *ops, + struct wwan_port_caps *caps, void *drvdata); /** -- GitLab From df5e87f16c338aa4f62ed3353e73e39e68af965e Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 16 Mar 2023 09:07:54 +0200 Subject: [PATCH 0586/2078] vxlan: Remove unused argument from vxlan_build_gbp_hdr( ) and vxlan_build_gpe_hdr( ) Remove unused argument (i.e. u32 vxflags) in vxlan_build_gbp_hdr( ) and vxlan_build_gpe_hdr( ) function arguments. Signed-off-by: Gavin Li Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e2e5f5dac7e63..8cc147c0d069c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2093,8 +2093,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, - struct vxlan_metadata *md) +static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_metadata *md) { struct vxlanhdr_gbp *gbp; @@ -2113,8 +2112,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } -static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags, - __be16 protocol) +static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, __be16 protocol) { struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh; @@ -2177,9 +2175,9 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, } if (vxflags & VXLAN_F_GBP) - vxlan_build_gbp_hdr(vxh, vxflags, md); + vxlan_build_gbp_hdr(vxh, md); if (vxflags & VXLAN_F_GPE) { - err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); + err = vxlan_build_gpe_hdr(vxh, skb->protocol); if (err < 0) return err; inner_protocol = skb->protocol; -- GitLab From c641e9279f3530aa2fe4bcb250477b555b75104a Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 16 Mar 2023 09:07:55 +0200 Subject: [PATCH 0587/2078] vxlan: Expose helper vxlan_build_gbp_hdr The function vxlan_build_gbp_hdr will be used by other modules to build gbp option in vxlan header according to gbp flags. Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Acked-by: Saeed Mahameed Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 19 ------------------- include/net/vxlan.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8cc147c0d069c..561fe1b314f5f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2093,25 +2093,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_metadata *md) -{ - struct vxlanhdr_gbp *gbp; - - if (!md->gbp) - return; - - gbp = (struct vxlanhdr_gbp *)vxh; - vxh->vx_flags |= VXLAN_HF_GBP; - - if (md->gbp & VXLAN_GBP_DONT_LEARN) - gbp->dont_learn = 1; - - if (md->gbp & VXLAN_GBP_POLICY_APPLIED) - gbp->policy_applied = 1; - - gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); -} - static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, __be16 protocol) { struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b7b2e9abfb373..20bd7d893e10a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -572,4 +572,23 @@ static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, return true; } +static inline void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, const struct vxlan_metadata *md) +{ + struct vxlanhdr_gbp *gbp; + + if (!md->gbp) + return; + + gbp = (struct vxlanhdr_gbp *)vxh; + vxh->vx_flags |= VXLAN_HF_GBP; + + if (md->gbp & VXLAN_GBP_DONT_LEARN) + gbp->dont_learn = 1; + + if (md->gbp & VXLAN_GBP_POLICY_APPLIED) + gbp->policy_applied = 1; + + gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); +} + #endif -- GitLab From 58de53c1025836529af9e22e96375d0874e2baca Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 16 Mar 2023 09:07:56 +0200 Subject: [PATCH 0588/2078] net/mlx5e: Add helper for encap_info_equal for tunnels with options For tunnels with options, eg, geneve and vxlan with gbp, they share the same way to compare the headers and options. Extract the code as a common function for them. Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Acked-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/tc_tun.h | 3 ++ .../mellanox/mlx5/core/en/tc_tun_encap.c | 32 +++++++++++++++++++ .../mellanox/mlx5/core/en/tc_tun_geneve.c | 24 +------------- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b38f693bbb52d..92065568bb192 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -115,6 +115,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b); +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags); #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 780224fd67a1d..a108e73c9f669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -3,6 +3,7 @@ #include #include +#include #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@ -571,6 +572,37 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; } +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); + b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + !memcmp(ip_tunnel_info_opts(a_info), + ip_tunnel_info_opts(b_info), + a_info->options_len); +} + static int cmp_decap_info(struct mlx5e_decap_key *a, struct mlx5e_decap_key *b) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 054d80c4e65cf..2bcd10b6d6537 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -337,29 +337,7 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - struct ip_tunnel_info *a_info; - struct ip_tunnel_info *b_info; - bool a_has_opts, b_has_opts; - - if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) - return false; - - a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); - b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); - - /* keys are equal when both don't have any options attached */ - if (!a_has_opts && !b_has_opts) - return true; - - if (a_has_opts != b_has_opts) - return false; - - /* geneve options stored in memory next to ip_tunnel_info struct */ - a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); - b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); - - return a_info->options_len == b_info->options_len && - memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0; + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT); } struct mlx5e_tc_tunnel geneve_tunnel = { -- GitLab From bc9d003dc48c381763c3a6309bfc5eecf9962b9c Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 16 Mar 2023 09:07:57 +0200 Subject: [PATCH 0589/2078] ip_tunnel: Preserve pointer const in ip_tunnel_info_opts Change ip_tunnel_info_opts( ) from static function to macro to cast return value and preserve the const-ness of the pointer. Signed-off-by: Gavin Li Reviewed-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index fca3576798166..255b32a90850a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -67,6 +67,12 @@ struct ip_tunnel_key { GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) +#define ip_tunnel_info_opts(info) \ + _Generic(info, \ + const struct ip_tunnel_info * : ((const void *)((info) + 1)),\ + struct ip_tunnel_info * : ((void *)((info) + 1))\ + ) + struct ip_tunnel_info { struct ip_tunnel_key key; #ifdef CONFIG_DST_CACHE @@ -485,11 +491,6 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) } } -static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) -{ - return info + 1; -} - static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { -- GitLab From 6ee44c518159c364e5d30ed85d357fe1d8e2c141 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 16 Mar 2023 09:07:58 +0200 Subject: [PATCH 0590/2078] net/mlx5e: TC, Add support for VxLAN GBP encap/decap flows offload Add HW offloading support for TC flows with VxLAN GBP encap/decap. Example of encap rule: tc filter add dev eth0 protocol ip ingress flower \ action tunnel_key set id 42 vxlan_opts 512 \ action mirred egress redirect dev vxlan1 Example of decap rule: tc filter add dev vxlan1 protocol ip ingress flower \ enc_key_id 42 enc_dst_port 4789 vxlan_opts 1024 \ action tunnel_key unset action mirred egress redirect dev eth0 Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Acked-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/tc_tun_vxlan.c | 72 ++++++++++++++++++- include/linux/mlx5/device.h | 6 ++ include/linux/mlx5/mlx5_ifc.h | 13 +++- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 1f62c702b6255..a184d739d5f86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2018 Mellanox Technologies. */ +#include #include #include "lib/vxlan.h" #include "en/tc_tun.h" @@ -86,9 +87,11 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], const struct ip_tunnel_key *tun_key = &e->tun_info->key; __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); struct udphdr *udp = (struct udphdr *)(buf); + const struct vxlan_metadata *md; struct vxlanhdr *vxh; - if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) && + e->tun_info->options_len != sizeof(*md)) return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; @@ -96,6 +99,57 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], udp->dest = tun_key->tp_dst; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(tun_id); + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) { + md = ip_tunnel_info_opts(e->tun_info); + vxlan_build_gbp_hdr(vxh, md); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_opts enc_opts; + void *misc5_c, *misc5_v; + u32 *gbp, *gbp_mask; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(priv->mdev, tunnel_header_0_1)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on VxLAN GBP is not supported"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) { + NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->len != sizeof(*gbp) || + enc_opts.mask->len != sizeof(*gbp_mask)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN GBP option/mask len is not 32 bits"); + return -EINVAL; + } + + gbp = (u32 *)&enc_opts.key->data[0]; + gbp_mask = (u32 *)&enc_opts.mask->data[0]; + + if (*gbp_mask & ~VXLAN_GBP_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)\n", *gbp_mask); + return -EINVAL; + } + + misc5_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_5); + misc5_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_5); + MLX5_SET(fte_match_set_misc5, misc5_c, tunnel_header_0, *gbp_mask); + MLX5_SET(fte_match_set_misc5, misc5_v, tunnel_header_0, *gbp); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; return 0; } @@ -122,6 +176,14 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, if (!enc_keyid.mask->keyid) return 0; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + int err; + + err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); + if (err) + return err; + } + /* match on VNI is required */ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, @@ -143,6 +205,12 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return 0; } +static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT); +} + static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) { const struct vxlan_dev *vxlan = netdev_priv(mirred_dev); @@ -160,6 +228,6 @@ struct mlx5e_tc_tunnel vxlan_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, .parse_tunnel = mlx5e_tc_tun_parse_vxlan, - .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_vxlan, .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex, }; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 71b06ebad4024..af4dd536a52c6 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1357,6 +1357,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) +#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) + +#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, ft_field_support_2_esw_fdb.cap) + #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d2c164f0778cd..e47d6c58da356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -404,10 +404,13 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 metadata_reg_c_0[0x1]; }; +/* Table 2170 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_flow_table_fields_supported_2_bits { u8 reserved_at_0[0xe]; u8 bth_opcode[0x1]; - u8 reserved_at_f[0x11]; + u8 reserved_at_f[0x1]; + u8 tunnel_header_0_1[0x1]; + u8 reserved_at_11[0xf]; u8 reserved_at_20[0x60]; }; @@ -895,7 +898,13 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; - u8 reserved_at_800[0x1000]; + u8 reserved_at_800[0xC00]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_esw_fdb; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_bitmask_support_2_esw_fdb; + + u8 reserved_at_1500[0x300]; u8 sw_steering_fdb_action_drop_icm_address_rx[0x40]; -- GitLab From 94c540fbfc80ef95ec398fc77da1920de2946edb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:30 +0000 Subject: [PATCH 0591/2078] udp: preserve const qualifier in udp_sk() We can change udp_sk() to propagate const qualifier of its argument, thanks to container_of_const() This should avoid some potential errors caused by accidental (const -> not_const) promotion. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/udp.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/udp.h b/include/linux/udp.h index a2892e151644e..43c1fb2d2c21a 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -97,10 +97,7 @@ struct udp_sock { #define UDP_MAX_SEGMENTS (1 << 6UL) -static inline struct udp_sock *udp_sk(const struct sock *sk) -{ - return (struct udp_sock *)sk; -} +#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { -- GitLab From 68ac9a8b6e65c7cbbe96541353dab1b3f8de2043 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:31 +0000 Subject: [PATCH 0592/2078] af_packet: preserve const qualifier in pkt_sk() We can change pkt_sk() to propagate const qualifier of its argument, thanks to container_of_const() This should avoid some potential errors caused by accidental (const -> not_const) promotion. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/packet/internal.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/packet/internal.h b/net/packet/internal.h index 680703dbce5e0..e793e99646f1c 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -133,10 +133,7 @@ struct packet_sock { atomic_t tp_drops ____cacheline_aligned_in_smp; }; -static inline struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} +#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) enum packet_sock_flags { PACKET_SOCK_ORIGDEV, -- GitLab From 0a2db4630b72486ec2f207ae433c2156b7fd3837 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:32 +0000 Subject: [PATCH 0593/2078] raw: preserve const qualifier in raw_sk() We can change raw_sk() to propagate const qualifier of its argument, thanks to container_of_const() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/raw.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index 7ad15830cf384..c215af02f7589 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -83,10 +83,7 @@ struct raw_sock { u32 ipmr_table; }; -static inline struct raw_sock *raw_sk(const struct sock *sk) -{ - return (struct raw_sock *)sk; -} +#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) -- GitLab From 47fcae28b9ec409423ba7f67f93e8345acce8a36 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:33 +0000 Subject: [PATCH 0594/2078] ipv6: raw: preserve const qualifier in raw6_sk() We can change raw6_sk() to propagate its argument const qualifier, thanks to container_of_const(). Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 37dfdcfcdd542..839247a4f48ea 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -336,10 +336,7 @@ static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } -static inline struct raw6_sock *raw6_sk(const struct sock *sk) -{ - return (struct raw6_sock *)sk; -} +#define raw6_sk(ptr) container_of_const(ptr, struct raw6_sock, inet.sk) #define ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ -- GitLab From ae6084b739925d84ef4a481d8eaa2187455f2e2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:34 +0000 Subject: [PATCH 0595/2078] dccp: preserve const qualifier in dccp_sk() We can change dccp_sk() to propagate its argument const qualifier, thanks to container_of_const(). Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/dccp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 07e547c02fd8b..325af611909f9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -305,10 +305,8 @@ struct dccp_sock { struct timer_list dccps_xmit_timer; }; -static inline struct dccp_sock *dccp_sk(const struct sock *sk) -{ - return (struct dccp_sock *)sk; -} +#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ + dccps_inet_connection.icsk_inet.sk) static inline const char *dccp_role(const struct sock *sk) { -- GitLab From b064ba9c3cfaf3d22d6153ec3c245eaa4d913674 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:35 +0000 Subject: [PATCH 0596/2078] af_unix: preserve const qualifier in unix_sk() We can change unix_sk() to propagate its argument const qualifier, thanks to container_of_const(). We need to change dump_common_audit_data() 'struct unix_sock *u' local var to get a const attribute. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/af_unix.h | 5 +---- security/lsm_audit.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 45ebde587138e..824c258143a3a 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -74,10 +74,7 @@ struct unix_sock { #endif }; -static inline struct unix_sock *unix_sk(const struct sock *sk) -{ - return (struct unix_sock *)sk; -} +#define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define peer_wait peer_wq.wait diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 00d3bdd386e29..368e77ca43c4a 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -310,7 +310,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, case LSM_AUDIT_DATA_NET: if (a->u.net->sk) { const struct sock *sk = a->u.net->sk; - struct unix_sock *u; + const struct unix_sock *u; struct unix_address *addr; int len = 0; char *p = NULL; -- GitLab From 407db475d505d5aa4c2352b80e429a02f5ccd1e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:36 +0000 Subject: [PATCH 0597/2078] smc: preserve const qualifier in smc_sk() We can change smc_sk() to propagate its argument const qualifier, thanks to container_of_const(). Signed-off-by: Eric Dumazet Cc: Karsten Graul Cc: Wenjia Zhang Cc: Jan Karcher Reviewed-by: Simon Horman Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/smc.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 5ed765ea0c731..2eeea4cdc7187 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -283,10 +283,7 @@ struct smc_sock { /* smc sock container */ * */ }; -static inline struct smc_sock *smc_sk(const struct sock *sk) -{ - return (struct smc_sock *)sk; -} +#define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { -- GitLab From c7154ca8e075cc456fe773879263b33ae307a59e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:37 +0000 Subject: [PATCH 0598/2078] x25: preserve const qualifier in [a]x25_sk() We can change [a]x25_sk() to propagate their argument const qualifier, thanks to container_of_const(). Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ax25.h | 5 +---- include/net/x25.h | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index f8cf3629a4193..0d939e5aee4ec 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -260,10 +260,7 @@ struct ax25_sock { struct ax25_cb *cb; }; -static inline struct ax25_sock *ax25_sk(const struct sock *sk) -{ - return (struct ax25_sock *) sk; -} +#define ax25_sk(ptr) container_of_const(ptr, struct ax25_sock, sk) static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) { diff --git a/include/net/x25.h b/include/net/x25.h index d7d6c2b4ffa71..597eb53c471e3 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -177,10 +177,7 @@ struct x25_forward { atomic_t refcnt; }; -static inline struct x25_sock *x25_sk(const struct sock *sk) -{ - return (struct x25_sock *)sk; -} +#define x25_sk(ptr) container_of_const(ptr, struct x25_sock, sk) /* af_x25.c */ extern int sysctl_x25_restart_request_timeout; -- GitLab From 403a40f2304d4730a780ab9d6a2b93d1e4ac39d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:38 +0000 Subject: [PATCH 0599/2078] mptcp: preserve const qualifier in mptcp_sk() We can change mptcp_sk() to propagate its argument const qualifier, thanks to container_of_const(). We need to change few things to avoid build errors: mptcp_set_datafin_timeout() and mptcp_rtx_head() have to accept non-const sk pointers. @msk local variable in mptcp_pending_tail() must be const. Signed-off-by: Eric Dumazet Cc: Matthieu Baerts Reviewed-by: Simon Horman Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6738181b29771..2d26b91143733 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -459,7 +459,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) return false; } -static void mptcp_set_datafin_timeout(const struct sock *sk) +static void mptcp_set_datafin_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 retransmits; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 339a6f0729898..e1310bc113bef 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -334,10 +334,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); } -static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) -{ - return (struct mptcp_sock *)sk; -} +#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) /* the msk socket don't use the backlog, also account for the bulk * free memory @@ -371,7 +368,7 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); + const struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; @@ -382,7 +379,7 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); -- GitLab From e9d9da91548b21e189fcd0259a0f2d26d1afc509 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2023 15:55:39 +0000 Subject: [PATCH 0600/2078] tcp: preserve const qualifier in tcp_sk() We can change tcp_sk() to propagate its argument const qualifier, thanks to container_of_const(). We have two places where a const sock pointer has to be upgraded to a write one. We have been using const qualifier for lockless listeners to clearly identify points where writes could happen. Add tcp_sk_rw() helper to better document these. tcp_inbound_md5_hash(), __tcp_grow_window(), tcp_reset_check() and tcp_rack_reo_wnd() get an additional const qualififer for their @tp local variables. smc_check_reset_syn_req() also needs a similar change. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/tcp.h | 10 ++++++---- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_minisocks.c | 5 +++-- net/ipv4/tcp_output.c | 9 +++++++-- net/ipv4/tcp_recovery.c | 2 +- 7 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ca7f05a130d2d..b4c08ac869835 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -472,10 +472,12 @@ enum tsq_flags { TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), }; -static inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} +#define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) + +/* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket. + * Used in context of (lockless) tcp listeners. + */ +#define tcp_sk_rw(ptr) container_of(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) struct tcp_timewait_sock { struct inet_timewait_sock tw_sk; diff --git a/include/net/tcp.h b/include/net/tcp.h index db9f828e9d1ee..a0a91a9882727 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -529,7 +529,7 @@ static inline void tcp_synq_overflow(const struct sock *sk) last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); if (!time_between32(now, last_overflow, last_overflow + HZ)) - WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); + WRITE_ONCE(tcp_sk_rw(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 01569de651b65..fd68d49490f28 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4570,7 +4570,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct tcphdr *th = tcp_hdr(skb); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int genhash, l3index; u8 newhash[16]; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 754ddbe0577f1..2b75cd9e2e92e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -458,7 +458,7 @@ static void tcp_sndbuf_expand(struct sock *sk) static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, unsigned int skbtruesize) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; @@ -5693,7 +5693,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t */ static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) && (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9a7ef7732c24c..dac0d62120e62 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -463,7 +463,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); -static void smc_check_reset_syn_req(struct tcp_sock *oldtp, +static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, struct tcp_sock *newtp) { @@ -492,7 +492,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; - struct tcp_sock *oldtp, *newtp; + const struct tcp_sock *oldtp; + struct tcp_sock *newtp; u32 seq; if (!newsk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1e743b39a83c..cfe128b81a010 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4127,8 +4127,13 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - if (unlikely(tcp_passive_fastopen(sk))) - tcp_sk(sk)->total_retrans++; + if (unlikely(tcp_passive_fastopen(sk))) { + /* sk has const attribute because listeners are lockless. + * However in this case, we are dealing with a passive fastopen + * socket thus we can change total_retrans value. + */ + tcp_sk_rw(sk)->total_retrans++; + } trace_tcp_retransmit_synack(sk, req); } return res; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 50abaa941387d..acf4869c5d3b5 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -4,7 +4,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->reord_seen) { /* If reordering has not been observed, be aggressive during -- GitLab From 08ff1c9f3e927ba3701c113dda70953a6f4afffa Mon Sep 17 00:00:00 2001 From: Sreevani Sreejith Date: Wed, 15 Mar 2023 12:54:05 -0700 Subject: [PATCH 0601/2078] bpf, docs: Libbpf overview documentation This patch documents overview of libbpf, including its features for developing BPF programs. Signed-off-by: Sreevani Sreejith Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20230315195405.2051559-1-ssreevani@meta.com --- Documentation/bpf/libbpf/index.rst | 25 +- Documentation/bpf/libbpf/libbpf_overview.rst | 228 +++++++++++++++++++ 2 files changed, 245 insertions(+), 8 deletions(-) create mode 100644 Documentation/bpf/libbpf/libbpf_overview.rst diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst index f9b3b252e28f4..7545a20496929 100644 --- a/Documentation/bpf/libbpf/index.rst +++ b/Documentation/bpf/libbpf/index.rst @@ -2,23 +2,32 @@ .. _libbpf: +====== libbpf ====== +If you are looking to develop BPF applications using the libbpf library, this +directory contains important documentation that you should read. + +To get started, it is recommended to begin with the :doc:`libbpf Overview +` document, which provides a high-level understanding of the +libbpf APIs and their usage. This will give you a solid foundation to start +exploring and utilizing the various features of libbpf to develop your BPF +applications. + .. toctree:: :maxdepth: 1 + libbpf_overview API Documentation program_types libbpf_naming_convention libbpf_build -This is documentation for libbpf, a userspace library for loading and -interacting with bpf programs. -All general BPF questions, including kernel functionality, libbpf APIs and -their application, should be sent to bpf@vger.kernel.org mailing list. -You can `subscribe `_ to the -mailing list search its `archive `_. -Please search the archive before asking new questions. It very well might -be that this was already addressed or answered before. +All general BPF questions, including kernel functionality, libbpf APIs and their +application, should be sent to bpf@vger.kernel.org mailing list. You can +`subscribe `_ to the mailing list +search its `archive `_. Please search the archive +before asking new questions. It may be that this was already addressed or +answered before. diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst new file mode 100644 index 0000000000000..f36a2d4ffea2b --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf_overview.rst @@ -0,0 +1,228 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +libbpf Overview +=============== + +libbpf is a C-based library containing a BPF loader that takes compiled BPF +object files and prepares and loads them into the Linux kernel. libbpf takes the +heavy lifting of loading, verifying, and attaching BPF programs to various +kernel hooks, allowing BPF application developers to focus only on BPF program +correctness and performance. + +The following are the high-level features supported by libbpf: + +* Provides high-level and low-level APIs for user space programs to interact + with BPF programs. The low-level APIs wrap all the bpf system call + functionality, which is useful when users need more fine-grained control + over the interactions between user space and BPF programs. +* Provides overall support for the BPF object skeleton generated by bpftool. + The skeleton file simplifies the process for the user space programs to access + global variables and work with BPF programs. +* Provides BPF-side APIS, including BPF helper definitions, BPF maps support, + and tracing helpers, allowing developers to simplify BPF code writing. +* Supports BPF CO-RE mechanism, enabling BPF developers to write portable + BPF programs that can be compiled once and run across different kernel + versions. + +This document will delve into the above concepts in detail, providing a deeper +understanding of the capabilities and advantages of libbpf and how it can help +you develop BPF applications efficiently. + +BPF App Lifecycle and libbpf APIs +================================== + +A BPF application consists of one or more BPF programs (either cooperating or +completely independent), BPF maps, and global variables. The global +variables are shared between all BPF programs, which allows them to cooperate on +a common set of data. libbpf provides APIs that user space programs can use to +manipulate the BPF programs by triggering different phases of a BPF application +lifecycle. + +The following section provides a brief overview of each phase in the BPF life +cycle: + +* **Open phase**: In this phase, libbpf parses the BPF + object file and discovers BPF maps, BPF programs, and global variables. After + a BPF app is opened, user space apps can make additional adjustments + (setting BPF program types, if necessary; pre-setting initial values for + global variables, etc.) before all the entities are created and loaded. + +* **Load phase**: In the load phase, libbpf creates BPF + maps, resolves various relocations, and verifies and loads BPF programs into + the kernel. At this point, libbpf validates all the parts of a BPF application + and loads the BPF program into the kernel, but no BPF program has yet been + executed. After the load phase, it’s possible to set up the initial BPF map + state without racing with the BPF program code execution. + +* **Attachment phase**: In this phase, libbpf + attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes, + cgroup hooks, network packet processing pipeline, etc.). During this + phase, BPF programs perform useful work such as processing + packets, or updating BPF maps and global variables that can be read from user + space. + +* **Tear down phase**: In the tear down phase, + libbpf detaches BPF programs and unloads them from the kernel. BPF maps are + destroyed, and all the resources used by the BPF app are freed. + +BPF Object Skeleton File +======================== + +BPF skeleton is an alternative interface to libbpf APIs for working with BPF +objects. Skeleton code abstract away generic libbpf APIs to significantly +simplify code for manipulating BPF programs from user space. Skeleton code +includes a bytecode representation of the BPF object file, simplifying the +process of distributing your BPF code. With BPF bytecode embedded, there are no +extra files to deploy along with your application binary. + +You can generate the skeleton header file ``(.skel.h)`` for a specific object +file by passing the BPF object to the bpftool. The generated BPF skeleton +provides the following custom functions that correspond to the BPF lifecycle, +each of them prefixed with the specific object name: + +* ``__open()`` – creates and opens BPF application (```` stands for + the specific bpf object name) +* ``__load()`` – instantiates, loads,and verifies BPF application parts +* ``__attach()`` – attaches all auto-attachable BPF programs (it’s + optional, you can have more control by using libbpf APIs directly) +* ``__destroy()`` – detaches all BPF programs and + frees up all used resources + +Using the skeleton code is the recommended way to work with bpf programs. Keep +in mind, BPF skeleton provides access to the underlying BPF object, so whatever +was possible to do with generic libbpf APIs is still possible even when the BPF +skeleton is used. It's an additive convenience feature, with no syscalls, and no +cumbersome code. + +Other Advantages of Using Skeleton File +--------------------------------------- + +* BPF skeleton provides an interface for user space programs to work with BPF + global variables. The skeleton code memory maps global variables as a struct + into user space. The struct interface allows user space programs to initialize + BPF programs before the BPF load phase and fetch and update data from user + space afterward. + +* The ``skel.h`` file reflects the object file structure by listing out the + available maps, programs, etc. BPF skeleton provides direct access to all the + BPF maps and BPF programs as struct fields. This eliminates the need for + string-based lookups with ``bpf_object_find_map_by_name()`` and + ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source + code and user-space code getting out of sync. + +* The embedded bytecode representation of the object file ensures that the + skeleton and the BPF object file are always in sync. + +BPF Helpers +=========== + +libbpf provides BPF-side APIs that BPF programs can use to interact with the +system. The BPF helpers definition allows developers to use them in BPF code as +any other plain C function. For example, there are helper functions to print +debugging messages, get the time since the system was booted, interact with BPF +maps, manipulate network packets, etc. + +For a complete description of what the helpers do, the arguments they take, and +the return value, see the `bpf-helpers +`_ man page. + +BPF CO-RE (Compile Once – Run Everywhere) +========================================= + +BPF programs work in the kernel space and have access to kernel memory and data +structures. One limitation that BPF applications come across is the lack of +portability across different kernel versions and configurations. `BCC +`_ is one of the solutions for BPF +portability. However, it comes with runtime overhead and a large binary size +from embedding the compiler with the application. + +libbpf steps up the BPF program portability by supporting the BPF CO-RE concept. +BPF CO-RE brings together BTF type information, libbpf, and the compiler to +produce a single executable binary that you can run on multiple kernel versions +and configurations. + +To make BPF programs portable libbpf relies on the BTF type information of the +running kernel. Kernel also exposes this self-describing authoritative BTF +information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``. + +You can generate the BTF information for the running kernel with the following +command: + +:: + + $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h + +The command generates a ``vmlinux.h`` header file with all kernel types +(:doc:`BTF types <../btf>`) that the running kernel uses. Including +``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel +headers. + +libbpf enables portability of BPF programs by looking at the BPF program’s +recorded BTF type and relocation information and matching them to BTF +information (vmlinux) provided by the running kernel. libbpf then resolves and +matches all the types and fields, and updates necessary offsets and other +relocatable data to ensure that BPF program’s logic functions correctly for a +specific kernel on the host. BPF CO-RE concept thus eliminates overhead +associated with BPF development and allows developers to write portable BPF +applications without modifications and runtime source code compilation on the +target machine. + +The following code snippet shows how to read the parent field of a kernel +``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a +CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read +``sz`` bytes from the field referenced by ``src`` into the memory pointed to by +``dst``. + +.. code-block:: C + :emphasize-lines: 6 + + //... + struct task_struct *task = (void *)bpf_get_current_task(); + struct task_struct *parent_task; + int err; + + err = bpf_core_read(&parent_task, sizeof(void *), &task->parent); + if (err) { + /* handle error */ + } + + /* parent_task contains the value of task->parent pointer */ + +In the code snippet, we first get a pointer to the current ``task_struct`` using +``bpf_get_current_task()``. We then use ``bpf_core_read()`` to read the parent +field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is +just like ``bpf_probe_read_kernel()`` BPF helper, except it records information +about the field that should be relocated on the target kernel. i.e, if the +``parent`` field gets shifted to a different offset within +``struct task_struct`` due to some new field added in front of it, libbpf will +automatically adjust the actual offset to the proper value. + +Getting Started with libbpf +=========================== + +Check out the `libbpf-bootstrap `_ +repository with simple examples of using libbpf to build various BPF +applications. + +See also `libbpf API documentation +`_. + +libbpf and Rust +=============== + +If you are building BPF applications in Rust, it is recommended to use the +`Libbpf-rs `_ library instead of bindgen +bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in +Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code +compilation and skeleton generation. Using Libbpf-rs will make building user +space part of the BPF application easier. Note that the BPF program themselves +must still be written in plain C. + +Additional Documentation +======================== + +* `Program types and ELF Sections `_ +* `API naming convention `_ +* `Building libbpf `_ +* `API documentation Convention `_ -- GitLab From b31587feaa0133b19a1900a26bce07a3c6d21af6 Mon Sep 17 00:00:00 2001 From: Bartosz Wawrzyniak Date: Thu, 16 Mar 2023 10:03:39 +0000 Subject: [PATCH 0602/2078] net: macb: Set MDIO clock divisor for pclk higher than 160MHz Currently macb sets clock divisor for pclk up to 160 MHz. Function gem_mdc_clk_div was updated to enable divisor for higher values of pclk. Signed-off-by: Bartosz Wawrzyniak Reviewed-by: Michal Kubiak Acked-by: Nicolas Ferre Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.h | 2 ++ drivers/net/ethernet/cadence/macb_main.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 14dfec4db8f9e..c1fc91c97cee4 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -692,6 +692,8 @@ #define GEM_CLK_DIV48 3 #define GEM_CLK_DIV64 4 #define GEM_CLK_DIV96 5 +#define GEM_CLK_DIV128 6 +#define GEM_CLK_DIV224 7 /* Constants for MAN register */ #define MACB_MAN_C22_SOF 1 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c4edd20c1c668..7698719b19096 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2640,8 +2640,12 @@ static u32 gem_mdc_clk_div(struct macb *bp) config = GEM_BF(CLK, GEM_CLK_DIV48); else if (pclk_hz <= 160000000) config = GEM_BF(CLK, GEM_CLK_DIV64); - else + else if (pclk_hz <= 240000000) config = GEM_BF(CLK, GEM_CLK_DIV96); + else if (pclk_hz <= 320000000) + config = GEM_BF(CLK, GEM_CLK_DIV128); + else + config = GEM_BF(CLK, GEM_CLK_DIV224); return config; } -- GitLab From c0e906a953f03c56d8029b0207195ee202fd646d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2023 14:04:19 +0200 Subject: [PATCH 0603/2078] net: smc91x: Replace of_gpio.h with what indeed is used of_gpio.h in this driver is solely used as a proxy to other headers. This is incorrect usage of the of_gpio.h. Replace it .h with what indeed is used in the code. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 35e99bf0c4015..032eccf8eb428 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -57,6 +57,7 @@ static const char version[] = #include #include #include +#include #include #include #include @@ -69,7 +70,6 @@ static const char version[] = #include #include #include -#include #include #include -- GitLab From a593a2fcfdfb92cfd0ffc54bc81b07e6bfaaaf46 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2023 14:08:26 +0200 Subject: [PATCH 0604/2078] net: phy: at803x: Replace of_gpio.h with what indeed is used of_gpio.h in this driver is solely used as a proxy to other headers. This is incorrect usage of the of_gpio.h. Replace it .h with what indeed is used in the code. Signed-off-by: Andy Shevchenko Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 22f4458274aae..656136628ffd8 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -13,12 +13,11 @@ #include #include #include -#include #include -#include #include #include #include +#include #include #include #include -- GitLab From e485f3a6eae0849f83b94936778a2325f72a0c89 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Fri, 17 Mar 2023 13:09:03 -0700 Subject: [PATCH 0605/2078] ixgb: Remove ixgb driver There are likely no users of this driver as the hardware has been discontinued since 2010. Remove the driver and all references to it in documentation. Suggested-by: Jakub Kicinski Signed-off-by: Tony Nguyen Acked-by: Jesse Brandeburg Signed-off-by: David S. Miller --- Documentation/PCI/pci-error-recovery.rst | 1 - .../device_drivers/ethernet/index.rst | 1 - .../device_drivers/ethernet/intel/ixgb.rst | 468 ---- arch/loongarch/configs/loongson3_defconfig | 1 - arch/mips/configs/loongson2k_defconfig | 1 - arch/mips/configs/loongson3_defconfig | 1 - arch/mips/configs/mtx1_defconfig | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/pseries_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 1 - drivers/net/ethernet/intel/Kconfig | 17 - drivers/net/ethernet/intel/Makefile | 1 - drivers/net/ethernet/intel/ixgb/Makefile | 9 - drivers/net/ethernet/intel/ixgb/ixgb.h | 179 -- drivers/net/ethernet/intel/ixgb/ixgb_ee.c | 580 ----- drivers/net/ethernet/intel/ixgb/ixgb_ee.h | 79 - .../net/ethernet/intel/ixgb/ixgb_ethtool.c | 642 ----- drivers/net/ethernet/intel/ixgb/ixgb_hw.c | 1229 --------- drivers/net/ethernet/intel/ixgb/ixgb_hw.h | 767 ------ drivers/net/ethernet/intel/ixgb/ixgb_ids.h | 23 - drivers/net/ethernet/intel/ixgb/ixgb_main.c | 2285 ----------------- drivers/net/ethernet/intel/ixgb/ixgb_osdep.h | 39 - drivers/net/ethernet/intel/ixgb/ixgb_param.c | 442 ---- 26 files changed, 6772 deletions(-) delete mode 100644 Documentation/networking/device_drivers/ethernet/intel/ixgb.rst delete mode 100644 drivers/net/ethernet/intel/ixgb/Makefile delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb.h delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ee.c delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ee.h delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_hw.c delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_hw.h delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ids.h delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_main.c delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_osdep.h delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_param.c diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index bdafeb4b66dcf..9981d330da8f0 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -418,7 +418,6 @@ That is, the recovery API only requires that: - drivers/next/e100.c - drivers/net/e1000 - drivers/net/e1000e - - drivers/net/ixgb - drivers/net/ixgbe - drivers/net/cxgb3 - drivers/net/s2io.c diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 392969ac88ad8..6e9e7012d000c 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -31,7 +31,6 @@ Contents: intel/fm10k intel/igb intel/igbvf - intel/ixgb intel/ixgbe intel/ixgbevf intel/i40e diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst deleted file mode 100644 index c6a233e68ad6c..0000000000000 --- a/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst +++ /dev/null @@ -1,468 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0+ - -===================================================================== -Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection -===================================================================== - -October 1, 2018 - - -Contents -======== - -- In This Release -- Identifying Your Adapter -- Command Line Parameters -- Improving Performance -- Additional Configurations -- Known Issues/Troubleshooting -- Support - - - -In This Release -=============== - -This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R) -Network Connection. This driver includes support for Itanium(R)2-based -systems. - -For questions related to hardware requirements, refer to the documentation -supplied with your 10 Gigabit adapter. All hardware requirements listed apply -to use with Linux. - -The following features are available in this kernel: - - Native VLANs - - Channel Bonding (teaming) - - SNMP - -Channel Bonding documentation can be found in the Linux kernel source: -/Documentation/networking/bonding.rst - -The driver information previously displayed in the /proc filesystem is not -supported in this release. Alternatively, you can use ethtool (version 1.6 -or later), lspci, and iproute2 to obtain the same information. - -Instructions on updating ethtool can be found in the section "Additional -Configurations" later in this document. - - -Identifying Your Adapter -======================== - -The following Intel network adapters are compatible with the drivers in this -release: - -+------------+------------------------------+----------------------------------+ -| Controller | Adapter Name | Physical Layer | -+============+==============================+==================================+ -| 82597EX | Intel(R) PRO/10GbE LR/SR/CX4 | - 10G Base-LR (fiber) | -| | Server Adapters | - 10G Base-SR (fiber) | -| | | - 10G Base-CX4 (copper) | -+------------+------------------------------+----------------------------------+ - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - https://support.intel.com - - -Command Line Parameters -======================= - -If the driver is built as a module, the following optional parameters are -used by entering them on the command line with the modprobe command using -this syntax:: - - modprobe ixgb [